From f2886d88e04af3d4d6ffb6b4ace1278ce020ff21 Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 14 Oct 2009 02:49:56 +0000 Subject: [PATCH] We now emit genotype calls git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1828 348d0f76-0448-11de-a6fe-93d51630548a --- .../genotyper/EMGenotypeCalculationModel.java | 69 +++++++++++++++---- .../gatk/walkers/genotyper/GenotypeCall.java | 17 +++-- ...PointEstimateGenotypeCalculationModel.java | 2 +- 3 files changed, 71 insertions(+), 17 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index 2f7948713..c528b27b9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -3,9 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.genotype.*; import java.util.*; @@ -48,16 +47,60 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); double strandScore = Math.max(forwardLod - lod, reverseLod - lod); - // TODO -- finish me... + logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore)); - System.out.println(String.format("LOD=%f, SLOD=%f", lod, strandScore)); + // generate the calls + GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF()); + List calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts); + if ( calls != null && calls.size() != 0 ) { - // make a call - // List calls - //return new GenotypeCall(context.getLocation(), ref,gl, pileup); - //out.addMultiSampleCall((Genotype)calls, GenotypeMetaData metadata); + // use multi-sample mode if we have multiple samples or the output type allows it + if ( out.supportsMultiSample() || samples.size() > 1 ) { - return null; + // annoying hack to get around Java generics + ArrayList callList = new ArrayList(); + for ( GenotypeCall call : calls ) + callList.add(call); + + out.addMultiSampleCall(callList, metadata); + } else { + out.addGenotypeCall(calls.get(0)); + } + } + return calls; + } + + protected List genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap contexts) { + HashMap GLs = results.getGenotypeLikelihoods(); + + // an optimization + double expectedChromosomes = 2.0 * (double)GLs.size() * results.getMAF(); + if ( expectedChromosomes < 1.0 ) + return null; + + ArrayList calls = new ArrayList(); + int variantCalls = 0; + + for ( String sample : GLs.keySet() ) { + // get the pileup + AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL); + ReadBackedPileup pileup = new ReadBackedPileup(ref, context); + pileup.setIncludeDeletionsInPileupString(true); + + // create the call + GenotypeCall call = new GenotypeCall(sample, context.getLocation(), ref, GLs.get(sample), pileup); + calls.add(call); + + // increment the variant count if it's non-ref + if ( call.isVariant() ) + variantCalls++; + } + + // if everyone is ref, don't emit any calls + if ( variantCalls == 0 ) + calls = null; + + return calls; } public EMOutput runEM(char ref, HashMap contexts, DiploidGenotypePriors priors, int[] baseCounts, StratifiedContext contextType) { @@ -212,19 +255,21 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode * A class to keep track of the EM output */ protected class EMOutput { - private double pD, pNull, pF; + private double pD, pNull, pF, MAF; private HashMap GLs; - EMOutput(double pD, double pNull, double pF, HashMap GLs) { + EMOutput(double pD, double pNull, double pF, double MAF, HashMap GLs) { this.pD = pD; this.pNull = pNull; this.pF = pF; + this.MAF = MAF; this.GLs = GLs; } public double getPofD() { return pD; } public double getPofNull() { return pNull; } public double getPofF() { return pF; } + public double getMAF() { return MAF; } public HashMap getGenotypeLikelihoods() { return GLs; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java index de7cabc4b..3559de4a2 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java @@ -13,9 +13,9 @@ import java.util.List; /** * @author aaron *

- * Class SSGenotypeCall + * Class GenotypeCall *

- * The mplementation of the genotype interface, which contains + * The implementation of the genotype interface, which contains * extra information for the various genotype outputs */ public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked, SampleBacked { @@ -47,7 +47,7 @@ public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Like */ public GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) { mSampleName = sampleName; - mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case + mRefBase = Character.toUpperCase(refBase); mGenotypeLikelihoods = gtlh; mLocation = location; mPileup = pileup; @@ -64,7 +64,7 @@ public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Like */ GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) { mSampleName = sampleName; - mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case + mRefBase = Character.toUpperCase(refBase); mGenotypeLikelihoods = gtlh; mLocation = location; mGenotype = genotype; @@ -209,6 +209,15 @@ public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Like return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString()); } + /** + * are we a variant? (non-ref) + * + * @return true if we're a variant + */ + public boolean isVariant() { + return isVariant(mRefBase); + } + /** * return this genotype as a variant * diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index 5195189df..e77839df5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -165,7 +165,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation pNull += p0; logger.debug("Final pD=" + pD + ", pNull=" + pNull); - return new EMOutput(pD, pNull, pF, GLs); + return new EMOutput(pD, pNull, pF, MAF, GLs); } private double compute_pD(HashMap GLs) {