We now emit genotype calls

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1828 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-10-14 02:49:56 +00:00
parent 1b214c0de5
commit f2886d88e0
3 changed files with 71 additions and 17 deletions

View File

@ -3,9 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import java.util.*;
@ -48,16 +47,60 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
// TODO -- finish me...
logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore));
System.out.println(String.format("LOD=%f, SLOD=%f", lod, strandScore));
// generate the calls
GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF());
List<GenotypeCall> calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts);
if ( calls != null && calls.size() != 0 ) {
// make a call
// List<GenotypeCall> calls
//return new GenotypeCall(context.getLocation(), ref,gl, pileup);
//out.addMultiSampleCall((Genotype)calls, GenotypeMetaData metadata);
// use multi-sample mode if we have multiple samples or the output type allows it
if ( out.supportsMultiSample() || samples.size() > 1 ) {
return null;
// annoying hack to get around Java generics
ArrayList<Genotype> callList = new ArrayList<Genotype>();
for ( GenotypeCall call : calls )
callList.add(call);
out.addMultiSampleCall(callList, metadata);
} else {
out.addGenotypeCall(calls.get(0));
}
}
return calls;
}
protected List<GenotypeCall> genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap<String, AlignmentContextBySample> contexts) {
HashMap<String, GenotypeLikelihoods> GLs = results.getGenotypeLikelihoods();
// an optimization
double expectedChromosomes = 2.0 * (double)GLs.size() * results.getMAF();
if ( expectedChromosomes < 1.0 )
return null;
ArrayList<GenotypeCall> calls = new ArrayList<GenotypeCall>();
int variantCalls = 0;
for ( String sample : GLs.keySet() ) {
// get the pileup
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
pileup.setIncludeDeletionsInPileupString(true);
// create the call
GenotypeCall call = new GenotypeCall(sample, context.getLocation(), ref, GLs.get(sample), pileup);
calls.add(call);
// increment the variant count if it's non-ref
if ( call.isVariant() )
variantCalls++;
}
// if everyone is ref, don't emit any calls
if ( variantCalls == 0 )
calls = null;
return calls;
}
public EMOutput runEM(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, int[] baseCounts, StratifiedContext contextType) {
@ -212,19 +255,21 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
* A class to keep track of the EM output
*/
protected class EMOutput {
private double pD, pNull, pF;
private double pD, pNull, pF, MAF;
private HashMap<String, GenotypeLikelihoods> GLs;
EMOutput(double pD, double pNull, double pF, HashMap<String, GenotypeLikelihoods> GLs) {
EMOutput(double pD, double pNull, double pF, double MAF, HashMap<String, GenotypeLikelihoods> GLs) {
this.pD = pD;
this.pNull = pNull;
this.pF = pF;
this.MAF = MAF;
this.GLs = GLs;
}
public double getPofD() { return pD; }
public double getPofNull() { return pNull; }
public double getPofF() { return pF; }
public double getMAF() { return MAF; }
public HashMap<String, GenotypeLikelihoods> getGenotypeLikelihoods() { return GLs; }
}

View File

@ -13,9 +13,9 @@ import java.util.List;
/**
* @author aaron
* <p/>
* Class SSGenotypeCall
* Class GenotypeCall
* <p/>
* The mplementation of the genotype interface, which contains
* The implementation of the genotype interface, which contains
* extra information for the various genotype outputs
*/
public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked, SampleBacked {
@ -47,7 +47,7 @@ public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Like
*/
public GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
mSampleName = sampleName;
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mRefBase = Character.toUpperCase(refBase);
mGenotypeLikelihoods = gtlh;
mLocation = location;
mPileup = pileup;
@ -64,7 +64,7 @@ public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Like
*/
GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
mSampleName = sampleName;
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mRefBase = Character.toUpperCase(refBase);
mGenotypeLikelihoods = gtlh;
mLocation = location;
mGenotype = genotype;
@ -209,6 +209,15 @@ public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Like
return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString());
}
/**
* are we a variant? (non-ref)
*
* @return true if we're a variant
*/
public boolean isVariant() {
return isVariant(mRefBase);
}
/**
* return this genotype as a variant
*

View File

@ -165,7 +165,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
pNull += p0;
logger.debug("Final pD=" + pD + ", pNull=" + pNull);
return new EMOutput(pD, pNull, pF, GLs);
return new EMOutput(pD, pNull, pF, MAF, GLs);
}
private double compute_pD(HashMap<String, GenotypeLikelihoods> GLs) {