diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index 298a0fea6..650d77049 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -31,7 +31,6 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul // use flat priors for GLs DiploidGenotypePriors priors = new DiploidGenotypePriors(); - int index = 0; for ( String sample : contexts.keySet() ) { StratifiedAlignmentContext context = contexts.get(sample); ReadBackedPileup pileup = context.getContext(contextType).getPileup(); @@ -47,12 +46,11 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref); for ( char alt : BaseUtils.BASES ) { if ( alt != ref ) { - DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref)); + DiploidGenotype hetGenotype = DiploidGenotype.unorderedValueOf(ref, alt); DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt); - AFMatrixMap.get(alt).setLikelihoods(posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()], index); + AFMatrixMap.get(alt).setLikelihoods(posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()], sample); } } - index++; } } @@ -74,14 +72,31 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul } } - protected List makeGenotypeCalls(char ref, char alt, Map contexts, GenomeLoc loc) { + protected List makeGenotypeCalls(char ref, char alt, int frequency, Map contexts, GenomeLoc loc) { ArrayList calls = new ArrayList(); + // set up some variables we'll need in the loop + AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt); + DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref); + DiploidGenotype hetGenotype = DiploidGenotype.unorderedValueOf(ref, alt); + DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt); + for ( String sample : GLs.keySet() ) { // create the call GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc); + // set the genotype and confidence + Pair AFbasedGenotype = matrix.getGenotype(frequency, sample); + call.setNegLog10PError(AFbasedGenotype.second); + if ( AFbasedGenotype.first == GenotypeType.REF.ordinal() ) + call.setGenotype(refGenotype); + else if ( AFbasedGenotype.first == GenotypeType.HET.ordinal() ) + call.setGenotype(hetGenotype); + else // ( AFbasedGenotype.first == GenotypeType.HOM.ordinal() ) + call.setGenotype(homGenotype); + + if ( call instanceof ReadBacked ) { ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(); ((ReadBacked)call).setPileup(pileup); @@ -105,12 +120,17 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul return calls; } + protected class AlleleFrequencyMatrix { - private double[][] matrix; - private int[] indexes; - private int N; - private int frequency; + private double[][] matrix; // allele frequency matrix + private int[] indexes; // matrix to maintain which genotype is active + private int N; // total frequencies + private int frequency; // current frequency + + // data structures necessary to maintain a list of the best genotypes and their scores + private ArrayList samples = new ArrayList(); + private HashMap>> samplesToGenotypesPerAF = new HashMap>>(); public AlleleFrequencyMatrix(int N) { this.N = N; @@ -121,7 +141,9 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul indexes[i] = 0; } - public void setLikelihoods(double AA, double AB, double BB, int index) { + public void setLikelihoods(double AA, double AB, double BB, String sample) { + int index = samples.size(); + samples.add(sample); matrix[index][GenotypeType.REF.ordinal()] = AA; matrix[index][GenotypeType.HET.ordinal()] = AB; matrix[index][GenotypeType.HOM.ordinal()] = BB; @@ -169,17 +191,48 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul for (int i = 0; i < N; i++) likelihoods += matrix[i][indexes[i]]; - //verboseWriter.write(frequency + "\n"); - //for (int i = 0; i < N; i++) { - // for (int j=0; j < 3; j++) { - // verboseWriter.write(String.valueOf(matrix[i][j])); - // verboseWriter.write(indexes[i] == j ? "* " : " "); - // } - // verboseWriter.write("\n"); - //} - //verboseWriter.write(likelihoods + "\n\n"); + /* + System.out.println(frequency); + for (int i = 0; i < N; i++) { + for (int j=0; j < 3; j++) { + System.out.print(String.valueOf(matrix[i][j])); + System.out.print(indexes[i] == j ? "* " : " "); + } + System.out.println(); + } + System.out.println(likelihoods); + System.out.println(); + */ + + recordGenotypes(); return likelihoods; } + + public Pair getGenotype(int frequency, String sample) { + return samplesToGenotypesPerAF.get(frequency).get(sample); + } + + private void recordGenotypes() { + HashMap> samplesToGenotypes = new HashMap>(); + + int index = 0; + for ( String sample : samples ) { + int genotype = indexes[index]; + + double score; + if ( genotype == GenotypeType.REF.ordinal() ) + score = matrix[index][GenotypeType.REF.ordinal()] - Math.max(matrix[index][GenotypeType.HET.ordinal()], matrix[index][GenotypeType.HOM.ordinal()]); + else if ( genotype == GenotypeType.HET.ordinal() ) + score = matrix[index][GenotypeType.HET.ordinal()] - Math.max(matrix[index][GenotypeType.REF.ordinal()], matrix[index][GenotypeType.HOM.ordinal()]); + else // ( genotype == GenotypeType.HOM.ordinal() ) + score = matrix[index][GenotypeType.HOM.ordinal()] - Math.max(matrix[index][GenotypeType.REF.ordinal()], matrix[index][GenotypeType.HET.ordinal()]); + + samplesToGenotypes.put(sample, new Pair(genotype, Math.abs(score))); + index++; + } + + samplesToGenotypesPerAF.put(frequency, samplesToGenotypes); + } } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index d513b1b20..3b4bca690 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -96,6 +96,14 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE); GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation()); + // set the genotype and confidence + double[] posteriors = GLs.get(sample).getPosteriors(); + Integer sorted[] = Utils.SortPermutation(posteriors); + DiploidGenotype bestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; + DiploidGenotype nextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]]; + call.setNegLog10PError(posteriors[bestGenotype.ordinal()] - posteriors[nextGenotype.ordinal()]); + call.setGenotype(bestGenotype); + if ( call instanceof ReadBacked ) { ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(); ((ReadBacked)call).setPileup(pileup); @@ -107,7 +115,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode ((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods()); } if ( call instanceof PosteriorsBacked ) { - ((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors()); + ((PosteriorsBacked)call).setPosteriors(posteriors); } calls.add(call); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index e252b4834..dd44cbff0 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -299,7 +299,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc verboseWriter.println(); } - protected List makeGenotypeCalls(char ref, char alt, Map contexts, GenomeLoc loc) { + protected List makeGenotypeCalls(char ref, char alt, int frequency, Map contexts, GenomeLoc loc) { // by default, we return no genotypes return new ArrayList(); } @@ -329,7 +329,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc return new Pair>(null, null); // populate the sample-specific data - List calls = makeGenotypeCalls(ref, bestAlternateAllele, contexts, loc); + List calls = makeGenotypeCalls(ref, bestAlternateAllele, bestAFguess, contexts, loc); // next, the general locus data // *** note that calculating strand bias involves overwriting data structures, so we do that last diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index 3500468b0..39b486bf7 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -43,9 +43,9 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation Pair discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE); // find the index of the best genotype - double[] posteriors = discoveryGL.second.getNormalizedPosteriors(); - Integer sortedPosteriors[] = Utils.SortPermutation(posteriors); - int bestIndex = sortedPosteriors[sortedPosteriors.length - 1]; + double[] normPosteriors = discoveryGL.second.getNormalizedPosteriors(); + Integer sortedNormPosteriors[] = Utils.SortPermutation(normPosteriors); + int bestIndex = sortedNormPosteriors[sortedNormPosteriors.length - 1]; // flag to determine if ref is the best call (not necessary in genotype mode) boolean bestIsRef = false; @@ -53,11 +53,11 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation // calculate the phred-scaled confidence score double phredScaledConfidence; if ( GENOTYPE_MODE ) { - phredScaledConfidence = QualityUtils.phredScaleErrorRate(1.0 - posteriors[bestIndex]); + phredScaledConfidence = QualityUtils.phredScaleErrorRate(1.0 - normPosteriors[bestIndex]); } else { int refIndex = DiploidGenotype.createHomGenotype(ref).ordinal(); bestIsRef = (refIndex == bestIndex); - double pError = (bestIsRef ? 1.0 - posteriors[refIndex] : posteriors[refIndex]); + double pError = (bestIsRef ? 1.0 - normPosteriors[refIndex] : normPosteriors[refIndex]); phredScaledConfidence = QualityUtils.phredScaleErrorRate(pError); } @@ -68,6 +68,14 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation // we can now create the genotype call object GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc); + // set the genotype and confidence + double[] posteriors = discoveryGL.second.getPosteriors(); + Integer sorted[] = Utils.SortPermutation(posteriors); + DiploidGenotype bestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; + DiploidGenotype nextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]]; + call.setNegLog10PError(posteriors[bestGenotype.ordinal()] - posteriors[nextGenotype.ordinal()]); + call.setGenotype(bestGenotype); + if ( call instanceof ReadBacked ) { ((ReadBacked)call).setPileup(discoveryGL.first); } @@ -78,7 +86,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation ((LikelihoodsBacked)call).setLikelihoods(discoveryGL.second.getLikelihoods()); } if ( call instanceof PosteriorsBacked ) { - ((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors()); + ((PosteriorsBacked)call).setPosteriors(posteriors); } VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index f02789b9a..f4a7001d3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -88,10 +88,9 @@ public class UnifiedArgumentCollection { @Argument(fullName = "min_allele_frequency", shortName = "min_freq", doc = "The minimum possible allele frequency in a population (advanced)", required = false) public double MINIMUM_ALLELE_FREQUENCY = 1e-8; - // Mark's filtering arguments - //@Argument(fullName = "minBaseQualityScore", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false) + @Argument(fullName = "minBaseQualityScore", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false) public Integer MIN_BASE_QUALTY_SCORE = -1; - //@Argument(fullName = "minMappingQualityScore", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false) + @Argument(fullName = "minMappingQualityScore", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false) public Integer MIN_MAPPING_QUALTY_SCORE = -1; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/AlleleConstrainedGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/AlleleConstrainedGenotype.java index 72adbbcbb..c81f4c33b 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/AlleleConstrainedGenotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/AlleleConstrainedGenotype.java @@ -8,7 +8,7 @@ package org.broadinstitute.sting.utils.genotype; *

* A genotype that can have only one of 3 genotypes AA,AB,BB */ -public abstract class AlleleConstrainedGenotype implements Genotype, PosteriorsBacked { +public abstract class AlleleConstrainedGenotype implements Genotype { protected final static char NO_CONSTRAINT = 'N'; @@ -36,6 +36,7 @@ public abstract class AlleleConstrainedGenotype implements Genotype, PosteriorsB } /** + * * @return returns the best genotype */ protected abstract DiploidGenotype getBestGenotype(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java index 41d9fee07..51652d0cf 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/DiploidGenotype.java @@ -44,13 +44,7 @@ public enum DiploidGenotype { } public boolean isHet() { - switch (this) { - case AA: - case CC: - case GG: - case TT: return false; - default: return true; - } + return base1 != base2; } /** @@ -71,14 +65,16 @@ public enum DiploidGenotype { /** * get the genotype, given a string of 2 chars which may not necessarily be ordered correctly - * @param genotype the string representation + * @param base1 base1 + * @param base2 base2 * @return the diploid genotype */ - public static DiploidGenotype unorderedValueOf(String genotype) { - if ( genotype == null || genotype.length() != 2 ) - throw new IllegalArgumentException("Diploid genotypes are represented by 2 characters"); - if ( genotype.charAt(0) > genotype.charAt(1) ) - genotype = String.format("%c%c", genotype.charAt(1), genotype.charAt(0)); - return valueOf(genotype); + public static DiploidGenotype unorderedValueOf(char base1, char base2) { + if ( base1 > base2 ) { + char temp = base1; + base1 = base2; + base2 = temp; + } + return valueOf(String.format("%c%c", base1, base2)); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java index 6f037bdcc..cd65bac39 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java @@ -12,8 +12,20 @@ public interface GenotypeCall extends Genotype { /** * - * @param variation the Variation object associated with this genotype + * @param variation the Variation object associated with this genotype */ public void setVariation(Variation variation); + /** + * + * @param genotype the genotype + */ + public void setGenotype(DiploidGenotype genotype); + + /** + * + * @param value -log10PEror + */ + public void setNegLog10PError(double value); + } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/PosteriorsBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/PosteriorsBacked.java index 81212c3b8..cec691b6b 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/PosteriorsBacked.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/PosteriorsBacked.java @@ -4,8 +4,8 @@ package org.broadinstitute.sting.utils.genotype; * @author aaron * Interface PosteriorsBacked * - * this interface indicates that the genotype is - * backed up by genotype posterior information. + * This interface indicates that the genotype is backed up by *diploid* genotype posterior information. + * The posteriors array is expected to have 10 entries (one for each of the possible diploid genotypes). */ public interface PosteriorsBacked { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java index e9d803926..cca345c32 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java @@ -82,6 +82,14 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot this.mVariation = variation; } + public void setGenotype(DiploidGenotype genotype) { + ; // do nothing: geli uses diploid posteriors to calculate the genotype + } + + public void setNegLog10PError(double value) { + ; // do nothing: geli uses diploid posteriors to calculate the P(error) + } + @Override public boolean equals(Object other) { lazyEval(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java index d55da57b9..29f5f1812 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java @@ -65,6 +65,10 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac mGenotype = genotype; } + public void setGenotype(DiploidGenotype genotype) { + setGenotype(genotype.toString()); + } + @Override public boolean equals(Object other) { if (other == null || !(other instanceof GLFGenotypeCall)) diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java index f59eeb54a..4dbff559a 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java @@ -4,8 +4,6 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.*; -import java.util.Arrays; - /** * @author ebanks @@ -20,14 +18,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty private ReadBackedPileup mPileup = null; private int mCoverage = 0; - private double[] mPosteriors; + private double mNegLog10PError = -1; private Variation mVariation = null; - // the reference genotype, the best genotype, and the next best genotype, lazy loaded - private DiploidGenotype mRefGenotype = null; - private DiploidGenotype mBestGenotype = null; - private DiploidGenotype mNextGenotype = null; + // the best genotype + private DiploidGenotype mGenotype = null; // the sample name, used to propulate the SampleBacked interface private String mSampleName; @@ -38,44 +34,30 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty mLocation = loc; // fill in empty data - mPosteriors = new double[10]; - Arrays.fill(mPosteriors, Double.MIN_VALUE); + mGenotype = DiploidGenotype.createHomGenotype(ref); mSampleName = ""; } - public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, int coverage, String sample) { + public VCFGenotypeCall(char ref, GenomeLoc loc, DiploidGenotype genotype, double negLog10PError, int coverage, String sample) { super(ref); mRefBase = ref; mLocation = loc; - mBestGenotype = DiploidGenotype.unorderedValueOf(genotype); - mRefGenotype = DiploidGenotype.createHomGenotype(ref); - mSampleName = sample; + mGenotype = genotype; + mNegLog10PError = negLog10PError; mCoverage = coverage; - - // set general posteriors to min double value - mPosteriors = new double[10]; - Arrays.fill(mPosteriors, Double.MIN_VALUE); - - // set the ref to PError - mPosteriors[mRefGenotype.ordinal()] = -1.0 * negLog10PError; - - // set the best genotype to zero (need to do this after ref in case ref=best) - mPosteriors[mBestGenotype.ordinal()] = 0.0; - - // choose a smart next best genotype and set it to PError - if ( mBestGenotype == mRefGenotype ) - mNextGenotype = DiploidGenotype.valueOf(BaseUtils.simpleComplement(genotype)); - else - mNextGenotype = mRefGenotype; - mPosteriors[mNextGenotype.ordinal()] = -1.0 * negLog10PError; + mSampleName = sample; } public void setPileup(ReadBackedPileup pileup) { mPileup = pileup; } - public void setPosteriors(double[] posteriors) { - mPosteriors = posteriors; + public void setGenotype(DiploidGenotype genotype) { + mGenotype = genotype; + } + + public void setNegLog10PError(double negLog10PError) { + mNegLog10PError = negLog10PError; } public void setVariation(Variation variation) { @@ -89,53 +71,21 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty @Override public boolean equals(Object other) { - lazyEval(); - if (other == null) + if ( other == null || !(other instanceof VCFGenotypeCall) ) return false; - if (other instanceof VCFGenotypeCall) { - VCFGenotypeCall otherCall = (VCFGenotypeCall) other; - if (!this.mBestGenotype.equals(otherCall.mBestGenotype)) - return false; - return (this.mRefBase == otherCall.mRefBase && this.mLocation.equals(otherCall.mLocation)); - } - return false; + VCFGenotypeCall otherCall = (VCFGenotypeCall) other; + + return mGenotype.equals(otherCall.mGenotype) && + mNegLog10PError == otherCall.mNegLog10PError && + mLocation.equals(otherCall.mLocation) && + mRefBase == otherCall.mRefBase; } public String toString() { - lazyEval(); - return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError=%.2f", - getLocation(), mBestGenotype, mRefGenotype, mRefBase, getReadCount(), getNegLog10PError()); - } - - private void lazyEval() { - if (mBestGenotype == null) { - char ref = this.getReference(); - char alt = this.getAlternateAllele(); - - mRefGenotype = DiploidGenotype.createHomGenotype(ref); - - // if we are constrained to a single alternate allele, use only that one - if ( alt != AlleleConstrainedGenotype.NO_CONSTRAINT ) { - DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref)); - DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt); - boolean hetOverHom = mPosteriors[hetGenotype.ordinal()] > mPosteriors[homGenotype.ordinal()]; - boolean hetOverRef = mPosteriors[hetGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()]; - boolean homOverRef = mPosteriors[homGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()]; - if ( hetOverHom ) { - mBestGenotype = (hetOverRef ? hetGenotype : mRefGenotype); - mNextGenotype = (!hetOverRef ? hetGenotype : (homOverRef ? homGenotype : mRefGenotype)); - } else { - mBestGenotype = (homOverRef ? homGenotype : mRefGenotype); - mNextGenotype = (!homOverRef ? homGenotype : (hetOverRef ? hetGenotype : mRefGenotype)); - } - } else { - Integer sorted[] = Utils.SortPermutation(mPosteriors); - mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; - mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]]; - } - } + return String.format("%s best=%s ref=%s depth=%d negLog10PError=%.2f", + getLocation(), mGenotype, mRefBase, getReadCount(), getNegLog10PError()); } /** @@ -144,25 +94,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty * @return get the one minus error value */ public double getNegLog10PError() { - return Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getNextBest().ordinal()]); + return mNegLog10PError; } // get the best genotype protected DiploidGenotype getBestGenotype() { - lazyEval(); - return mBestGenotype; - } - - // get the alternate genotype - private DiploidGenotype getNextBest() { - lazyEval(); - return mNextGenotype; - } - - // get the ref genotype - private DiploidGenotype getRefGenotype() { - lazyEval(); - return mRefGenotype; + return mGenotype; } /** @@ -238,11 +175,14 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty public Variation toVariation(char ref) { if ( mVariation == null ) { VCFVariationCall var = new VCFVariationCall(ref, mLocation, isVariant() ? Variation.VARIANT_TYPE.SNP : Variation.VARIANT_TYPE.REFERENCE); - double confidence = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]); - var.setConfidence(confidence); - if ( isVariant() ) - var.addAlternateAllele(Character.toString(mBestGenotype.base1 != ref ? mBestGenotype.base1 : mBestGenotype.base2)); - mVariation = var; + var.setConfidence(10 * mNegLog10PError); + if ( !mGenotype.isHomRef(ref) ) { + if ( mGenotype.base1 != ref ) + var.addAlternateAllele(Character.toString(mGenotype.base1)); + if ( mGenotype.isHet() && mGenotype.base2 != ref ) + var.addAlternateAllele(Character.toString(mGenotype.base2)); + } + mVariation = var; } return mVariation; } @@ -274,15 +214,6 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty return mRefBase; } - /** - * get the posteriors - * - * @return the posteriors - */ - public double[] getPosteriors() { - return mPosteriors; - } - /** * @return returns the sample name for this genotype */ diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index c6d029d51..5072626f1 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -42,7 +42,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { private char mReferenceBase; // our location private GenomeLoc mLoc; - // our id; set to '.' if not available + // our id private String mID; // the alternate bases private final List mAlts = new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index a382305bd..1ea423556 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -179,6 +179,8 @@ public class VCFUtils { if ( freqsSeen > 0 ) infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen))); + // TODO -- "." and "0" are wrong -- need to use values from the records + return new VCFRecord(params.getReferenceBase(), params.getContig(), params.getPosition(), diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 5dd7dbc4b..4672c88da 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("8f9ee611603dc75c7ca8b15d81000a82")); + Arrays.asList("ad00b255c83fe43213758b280646e4b4")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("a8979577debf9c8d7ea4a40ee1a0f1ef")); + Arrays.asList("e709714e288c508f79c1ffc5cdbe9cca")); executeTest("testMultiSamplePilot2 - Point Estimate EM", spec); } @@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testPooled1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, - Arrays.asList("253aba7ce0af7be4104dd275873a541d")); + Arrays.asList("2214983d4ae3a19bc16e7cb89fc3128f")); executeTest("testPooled1", spec); } @@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("1be2ce6ac40bdaf3d8f264f6d73c84ae")); + Arrays.asList("9e98eba56add9989e2d40834efc8c803")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("8f92a56da4bd56b8755e9db89c31bfff")); + Arrays.asList("fe1077636feb41c69f7c4f64147e2bba")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("67913655a8063f4deb9a6138f0844ddd")); + Arrays.asList("6a739a0acb620aeed5d8e0de7b59877a")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } @@ -105,7 +105,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testGenotypeModeJoint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -genotype -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1, - Arrays.asList("d51760ad01195ab299925595c6f62bb4")); + Arrays.asList("85fbcdd816ecc1d9686df244a6e039dd")); executeTest("testGenotypeMode - Joint Estimate", spec); } @@ -113,7 +113,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testAllBasesModeJoint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -all_bases -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1, - Arrays.asList("2322d3ebf09ebf901fbcb653119d0b47")); + Arrays.asList("f871576cf49e7a83a0f6b5580b0178c2")); executeTest("testAllBasesMode - Joint Estimate", spec); }