We now emit genotype calls
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1828 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1b214c0de5
commit
f2886d88e0
|
|
@ -3,9 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.genotype.*;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -48,16 +47,60 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
||||||
logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||||
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
|
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
|
||||||
|
|
||||||
// TODO -- finish me...
|
logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore));
|
||||||
|
|
||||||
System.out.println(String.format("LOD=%f, SLOD=%f", lod, strandScore));
|
// generate the calls
|
||||||
|
GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF());
|
||||||
|
List<GenotypeCall> calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts);
|
||||||
|
if ( calls != null && calls.size() != 0 ) {
|
||||||
|
|
||||||
// make a call
|
// use multi-sample mode if we have multiple samples or the output type allows it
|
||||||
// List<GenotypeCall> calls
|
if ( out.supportsMultiSample() || samples.size() > 1 ) {
|
||||||
//return new GenotypeCall(context.getLocation(), ref,gl, pileup);
|
|
||||||
//out.addMultiSampleCall((Genotype)calls, GenotypeMetaData metadata);
|
|
||||||
|
|
||||||
|
// annoying hack to get around Java generics
|
||||||
|
ArrayList<Genotype> callList = new ArrayList<Genotype>();
|
||||||
|
for ( GenotypeCall call : calls )
|
||||||
|
callList.add(call);
|
||||||
|
|
||||||
|
out.addMultiSampleCall(callList, metadata);
|
||||||
|
} else {
|
||||||
|
out.addGenotypeCall(calls.get(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return calls;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<GenotypeCall> genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap<String, AlignmentContextBySample> contexts) {
|
||||||
|
HashMap<String, GenotypeLikelihoods> GLs = results.getGenotypeLikelihoods();
|
||||||
|
|
||||||
|
// an optimization
|
||||||
|
double expectedChromosomes = 2.0 * (double)GLs.size() * results.getMAF();
|
||||||
|
if ( expectedChromosomes < 1.0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
ArrayList<GenotypeCall> calls = new ArrayList<GenotypeCall>();
|
||||||
|
int variantCalls = 0;
|
||||||
|
|
||||||
|
for ( String sample : GLs.keySet() ) {
|
||||||
|
// get the pileup
|
||||||
|
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
|
||||||
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
|
pileup.setIncludeDeletionsInPileupString(true);
|
||||||
|
|
||||||
|
// create the call
|
||||||
|
GenotypeCall call = new GenotypeCall(sample, context.getLocation(), ref, GLs.get(sample), pileup);
|
||||||
|
calls.add(call);
|
||||||
|
|
||||||
|
// increment the variant count if it's non-ref
|
||||||
|
if ( call.isVariant() )
|
||||||
|
variantCalls++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if everyone is ref, don't emit any calls
|
||||||
|
if ( variantCalls == 0 )
|
||||||
|
calls = null;
|
||||||
|
|
||||||
|
return calls;
|
||||||
}
|
}
|
||||||
|
|
||||||
public EMOutput runEM(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, int[] baseCounts, StratifiedContext contextType) {
|
public EMOutput runEM(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, int[] baseCounts, StratifiedContext contextType) {
|
||||||
|
|
@ -212,19 +255,21 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
||||||
* A class to keep track of the EM output
|
* A class to keep track of the EM output
|
||||||
*/
|
*/
|
||||||
protected class EMOutput {
|
protected class EMOutput {
|
||||||
private double pD, pNull, pF;
|
private double pD, pNull, pF, MAF;
|
||||||
private HashMap<String, GenotypeLikelihoods> GLs;
|
private HashMap<String, GenotypeLikelihoods> GLs;
|
||||||
|
|
||||||
EMOutput(double pD, double pNull, double pF, HashMap<String, GenotypeLikelihoods> GLs) {
|
EMOutput(double pD, double pNull, double pF, double MAF, HashMap<String, GenotypeLikelihoods> GLs) {
|
||||||
this.pD = pD;
|
this.pD = pD;
|
||||||
this.pNull = pNull;
|
this.pNull = pNull;
|
||||||
this.pF = pF;
|
this.pF = pF;
|
||||||
|
this.MAF = MAF;
|
||||||
this.GLs = GLs;
|
this.GLs = GLs;
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getPofD() { return pD; }
|
public double getPofD() { return pD; }
|
||||||
public double getPofNull() { return pNull; }
|
public double getPofNull() { return pNull; }
|
||||||
public double getPofF() { return pF; }
|
public double getPofF() { return pF; }
|
||||||
|
public double getMAF() { return MAF; }
|
||||||
public HashMap<String, GenotypeLikelihoods> getGenotypeLikelihoods() { return GLs; }
|
public HashMap<String, GenotypeLikelihoods> getGenotypeLikelihoods() { return GLs; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,9 @@ import java.util.List;
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
* <p/>
|
* <p/>
|
||||||
* Class SSGenotypeCall
|
* Class GenotypeCall
|
||||||
* <p/>
|
* <p/>
|
||||||
* The mplementation of the genotype interface, which contains
|
* The implementation of the genotype interface, which contains
|
||||||
* extra information for the various genotype outputs
|
* extra information for the various genotype outputs
|
||||||
*/
|
*/
|
||||||
public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked, SampleBacked {
|
public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked, SampleBacked {
|
||||||
|
|
@ -47,7 +47,7 @@ public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Like
|
||||||
*/
|
*/
|
||||||
public GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
|
public GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
|
||||||
mSampleName = sampleName;
|
mSampleName = sampleName;
|
||||||
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
mRefBase = Character.toUpperCase(refBase);
|
||||||
mGenotypeLikelihoods = gtlh;
|
mGenotypeLikelihoods = gtlh;
|
||||||
mLocation = location;
|
mLocation = location;
|
||||||
mPileup = pileup;
|
mPileup = pileup;
|
||||||
|
|
@ -64,7 +64,7 @@ public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Like
|
||||||
*/
|
*/
|
||||||
GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
|
GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
|
||||||
mSampleName = sampleName;
|
mSampleName = sampleName;
|
||||||
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
mRefBase = Character.toUpperCase(refBase);
|
||||||
mGenotypeLikelihoods = gtlh;
|
mGenotypeLikelihoods = gtlh;
|
||||||
mLocation = location;
|
mLocation = location;
|
||||||
mGenotype = genotype;
|
mGenotype = genotype;
|
||||||
|
|
@ -209,6 +209,15 @@ public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Like
|
||||||
return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString());
|
return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* are we a variant? (non-ref)
|
||||||
|
*
|
||||||
|
* @return true if we're a variant
|
||||||
|
*/
|
||||||
|
public boolean isVariant() {
|
||||||
|
return isVariant(mRefBase);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* return this genotype as a variant
|
* return this genotype as a variant
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -165,7 +165,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
||||||
pNull += p0;
|
pNull += p0;
|
||||||
logger.debug("Final pD=" + pD + ", pNull=" + pNull);
|
logger.debug("Final pD=" + pD + ", pNull=" + pNull);
|
||||||
|
|
||||||
return new EMOutput(pD, pNull, pF, GLs);
|
return new EMOutput(pD, pNull, pF, MAF, GLs);
|
||||||
}
|
}
|
||||||
|
|
||||||
private double compute_pD(HashMap<String, GenotypeLikelihoods> GLs) {
|
private double compute_pD(HashMap<String, GenotypeLikelihoods> GLs) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue