Pull the genotype (and genotype quality) calculation out of the VCF code and into the Genotyper.
[Also, enable Mark's new UG arguments] git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2355 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2cbc85cc7a
commit
874552ff75
|
|
@ -31,7 +31,6 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
// use flat priors for GLs
|
||||
DiploidGenotypePriors priors = new DiploidGenotypePriors();
|
||||
|
||||
int index = 0;
|
||||
for ( String sample : contexts.keySet() ) {
|
||||
StratifiedAlignmentContext context = contexts.get(sample);
|
||||
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
|
||||
|
|
@ -47,12 +46,11 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
for ( char alt : BaseUtils.BASES ) {
|
||||
if ( alt != ref ) {
|
||||
DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
|
||||
DiploidGenotype hetGenotype = DiploidGenotype.unorderedValueOf(ref, alt);
|
||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
|
||||
AFMatrixMap.get(alt).setLikelihoods(posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()], index);
|
||||
AFMatrixMap.get(alt).setLikelihoods(posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()], sample);
|
||||
}
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -74,14 +72,31 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
}
|
||||
}
|
||||
|
||||
protected List<Genotype> makeGenotypeCalls(char ref, char alt, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
|
||||
protected List<Genotype> makeGenotypeCalls(char ref, char alt, int frequency, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
|
||||
ArrayList<Genotype> calls = new ArrayList<Genotype>();
|
||||
|
||||
// set up some variables we'll need in the loop
|
||||
AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt);
|
||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
DiploidGenotype hetGenotype = DiploidGenotype.unorderedValueOf(ref, alt);
|
||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
|
||||
|
||||
for ( String sample : GLs.keySet() ) {
|
||||
|
||||
// create the call
|
||||
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
|
||||
|
||||
// set the genotype and confidence
|
||||
Pair<Integer, Double> AFbasedGenotype = matrix.getGenotype(frequency, sample);
|
||||
call.setNegLog10PError(AFbasedGenotype.second);
|
||||
if ( AFbasedGenotype.first == GenotypeType.REF.ordinal() )
|
||||
call.setGenotype(refGenotype);
|
||||
else if ( AFbasedGenotype.first == GenotypeType.HET.ordinal() )
|
||||
call.setGenotype(hetGenotype);
|
||||
else // ( AFbasedGenotype.first == GenotypeType.HOM.ordinal() )
|
||||
call.setGenotype(homGenotype);
|
||||
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
|
||||
((ReadBacked)call).setPileup(pileup);
|
||||
|
|
@ -105,12 +120,17 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
return calls;
|
||||
}
|
||||
|
||||
|
||||
protected class AlleleFrequencyMatrix {
|
||||
|
||||
private double[][] matrix;
|
||||
private int[] indexes;
|
||||
private int N;
|
||||
private int frequency;
|
||||
private double[][] matrix; // allele frequency matrix
|
||||
private int[] indexes; // matrix to maintain which genotype is active
|
||||
private int N; // total frequencies
|
||||
private int frequency; // current frequency
|
||||
|
||||
// data structures necessary to maintain a list of the best genotypes and their scores
|
||||
private ArrayList<String> samples = new ArrayList<String>();
|
||||
private HashMap<Integer, HashMap<String, Pair<Integer, Double>>> samplesToGenotypesPerAF = new HashMap<Integer, HashMap<String, Pair<Integer, Double>>>();
|
||||
|
||||
public AlleleFrequencyMatrix(int N) {
|
||||
this.N = N;
|
||||
|
|
@ -121,7 +141,9 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
indexes[i] = 0;
|
||||
}
|
||||
|
||||
public void setLikelihoods(double AA, double AB, double BB, int index) {
|
||||
public void setLikelihoods(double AA, double AB, double BB, String sample) {
|
||||
int index = samples.size();
|
||||
samples.add(sample);
|
||||
matrix[index][GenotypeType.REF.ordinal()] = AA;
|
||||
matrix[index][GenotypeType.HET.ordinal()] = AB;
|
||||
matrix[index][GenotypeType.HOM.ordinal()] = BB;
|
||||
|
|
@ -169,17 +191,48 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
for (int i = 0; i < N; i++)
|
||||
likelihoods += matrix[i][indexes[i]];
|
||||
|
||||
//verboseWriter.write(frequency + "\n");
|
||||
//for (int i = 0; i < N; i++) {
|
||||
// for (int j=0; j < 3; j++) {
|
||||
// verboseWriter.write(String.valueOf(matrix[i][j]));
|
||||
// verboseWriter.write(indexes[i] == j ? "* " : " ");
|
||||
// }
|
||||
// verboseWriter.write("\n");
|
||||
//}
|
||||
//verboseWriter.write(likelihoods + "\n\n");
|
||||
/*
|
||||
System.out.println(frequency);
|
||||
for (int i = 0; i < N; i++) {
|
||||
for (int j=0; j < 3; j++) {
|
||||
System.out.print(String.valueOf(matrix[i][j]));
|
||||
System.out.print(indexes[i] == j ? "* " : " ");
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
System.out.println(likelihoods);
|
||||
System.out.println();
|
||||
*/
|
||||
|
||||
recordGenotypes();
|
||||
|
||||
return likelihoods;
|
||||
}
|
||||
|
||||
public Pair<Integer, Double> getGenotype(int frequency, String sample) {
|
||||
return samplesToGenotypesPerAF.get(frequency).get(sample);
|
||||
}
|
||||
|
||||
private void recordGenotypes() {
|
||||
HashMap<String, Pair<Integer, Double>> samplesToGenotypes = new HashMap<String, Pair<Integer, Double>>();
|
||||
|
||||
int index = 0;
|
||||
for ( String sample : samples ) {
|
||||
int genotype = indexes[index];
|
||||
|
||||
double score;
|
||||
if ( genotype == GenotypeType.REF.ordinal() )
|
||||
score = matrix[index][GenotypeType.REF.ordinal()] - Math.max(matrix[index][GenotypeType.HET.ordinal()], matrix[index][GenotypeType.HOM.ordinal()]);
|
||||
else if ( genotype == GenotypeType.HET.ordinal() )
|
||||
score = matrix[index][GenotypeType.HET.ordinal()] - Math.max(matrix[index][GenotypeType.REF.ordinal()], matrix[index][GenotypeType.HOM.ordinal()]);
|
||||
else // ( genotype == GenotypeType.HOM.ordinal() )
|
||||
score = matrix[index][GenotypeType.HOM.ordinal()] - Math.max(matrix[index][GenotypeType.REF.ordinal()], matrix[index][GenotypeType.HET.ordinal()]);
|
||||
|
||||
samplesToGenotypes.put(sample, new Pair<Integer, Double>(genotype, Math.abs(score)));
|
||||
index++;
|
||||
}
|
||||
|
||||
samplesToGenotypesPerAF.put(frequency, samplesToGenotypes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -96,6 +96,14 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
|
||||
// set the genotype and confidence
|
||||
double[] posteriors = GLs.get(sample).getPosteriors();
|
||||
Integer sorted[] = Utils.SortPermutation(posteriors);
|
||||
DiploidGenotype bestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
|
||||
DiploidGenotype nextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
||||
call.setNegLog10PError(posteriors[bestGenotype.ordinal()] - posteriors[nextGenotype.ordinal()]);
|
||||
call.setGenotype(bestGenotype);
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
|
||||
((ReadBacked)call).setPileup(pileup);
|
||||
|
|
@ -107,7 +115,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
|
||||
}
|
||||
if ( call instanceof PosteriorsBacked ) {
|
||||
((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
|
||||
((PosteriorsBacked)call).setPosteriors(posteriors);
|
||||
}
|
||||
|
||||
calls.add(call);
|
||||
|
|
|
|||
|
|
@ -299,7 +299,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
verboseWriter.println();
|
||||
}
|
||||
|
||||
protected List<Genotype> makeGenotypeCalls(char ref, char alt, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
|
||||
protected List<Genotype> makeGenotypeCalls(char ref, char alt, int frequency, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
|
||||
// by default, we return no genotypes
|
||||
return new ArrayList<Genotype>();
|
||||
}
|
||||
|
|
@ -329,7 +329,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
return new Pair<VariationCall, List<Genotype>>(null, null);
|
||||
|
||||
// populate the sample-specific data
|
||||
List<Genotype> calls = makeGenotypeCalls(ref, bestAlternateAllele, contexts, loc);
|
||||
List<Genotype> calls = makeGenotypeCalls(ref, bestAlternateAllele, bestAFguess, contexts, loc);
|
||||
|
||||
// next, the general locus data
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
|
|
|
|||
|
|
@ -43,9 +43,9 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
|
||||
// find the index of the best genotype
|
||||
double[] posteriors = discoveryGL.second.getNormalizedPosteriors();
|
||||
Integer sortedPosteriors[] = Utils.SortPermutation(posteriors);
|
||||
int bestIndex = sortedPosteriors[sortedPosteriors.length - 1];
|
||||
double[] normPosteriors = discoveryGL.second.getNormalizedPosteriors();
|
||||
Integer sortedNormPosteriors[] = Utils.SortPermutation(normPosteriors);
|
||||
int bestIndex = sortedNormPosteriors[sortedNormPosteriors.length - 1];
|
||||
|
||||
// flag to determine if ref is the best call (not necessary in genotype mode)
|
||||
boolean bestIsRef = false;
|
||||
|
|
@ -53,11 +53,11 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
// calculate the phred-scaled confidence score
|
||||
double phredScaledConfidence;
|
||||
if ( GENOTYPE_MODE ) {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(1.0 - posteriors[bestIndex]);
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(1.0 - normPosteriors[bestIndex]);
|
||||
} else {
|
||||
int refIndex = DiploidGenotype.createHomGenotype(ref).ordinal();
|
||||
bestIsRef = (refIndex == bestIndex);
|
||||
double pError = (bestIsRef ? 1.0 - posteriors[refIndex] : posteriors[refIndex]);
|
||||
double pError = (bestIsRef ? 1.0 - normPosteriors[refIndex] : normPosteriors[refIndex]);
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(pError);
|
||||
}
|
||||
|
||||
|
|
@ -68,6 +68,14 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
// we can now create the genotype call object
|
||||
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
|
||||
|
||||
// set the genotype and confidence
|
||||
double[] posteriors = discoveryGL.second.getPosteriors();
|
||||
Integer sorted[] = Utils.SortPermutation(posteriors);
|
||||
DiploidGenotype bestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
|
||||
DiploidGenotype nextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
||||
call.setNegLog10PError(posteriors[bestGenotype.ordinal()] - posteriors[nextGenotype.ordinal()]);
|
||||
call.setGenotype(bestGenotype);
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
((ReadBacked)call).setPileup(discoveryGL.first);
|
||||
}
|
||||
|
|
@ -78,7 +86,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
((LikelihoodsBacked)call).setLikelihoods(discoveryGL.second.getLikelihoods());
|
||||
}
|
||||
if ( call instanceof PosteriorsBacked ) {
|
||||
((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors());
|
||||
((PosteriorsBacked)call).setPosteriors(posteriors);
|
||||
}
|
||||
|
||||
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP);
|
||||
|
|
|
|||
|
|
@ -88,10 +88,9 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "min_allele_frequency", shortName = "min_freq", doc = "The minimum possible allele frequency in a population (advanced)", required = false)
|
||||
public double MINIMUM_ALLELE_FREQUENCY = 1e-8;
|
||||
|
||||
// Mark's filtering arguments
|
||||
//@Argument(fullName = "minBaseQualityScore", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
|
||||
@Argument(fullName = "minBaseQualityScore", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
|
||||
public Integer MIN_BASE_QUALTY_SCORE = -1;
|
||||
|
||||
//@Argument(fullName = "minMappingQualityScore", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
|
||||
@Argument(fullName = "minMappingQualityScore", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
|
||||
public Integer MIN_MAPPING_QUALTY_SCORE = -1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ package org.broadinstitute.sting.utils.genotype;
|
|||
* <p/>
|
||||
* A genotype that can have only one of 3 genotypes AA,AB,BB
|
||||
*/
|
||||
public abstract class AlleleConstrainedGenotype implements Genotype, PosteriorsBacked {
|
||||
public abstract class AlleleConstrainedGenotype implements Genotype {
|
||||
|
||||
protected final static char NO_CONSTRAINT = 'N';
|
||||
|
||||
|
|
@ -36,6 +36,7 @@ public abstract class AlleleConstrainedGenotype implements Genotype, PosteriorsB
|
|||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return returns the best genotype
|
||||
*/
|
||||
protected abstract DiploidGenotype getBestGenotype();
|
||||
|
|
|
|||
|
|
@ -44,13 +44,7 @@ public enum DiploidGenotype {
|
|||
}
|
||||
|
||||
public boolean isHet() {
|
||||
switch (this) {
|
||||
case AA:
|
||||
case CC:
|
||||
case GG:
|
||||
case TT: return false;
|
||||
default: return true;
|
||||
}
|
||||
return base1 != base2;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -71,14 +65,16 @@ public enum DiploidGenotype {
|
|||
|
||||
/**
|
||||
* get the genotype, given a string of 2 chars which may not necessarily be ordered correctly
|
||||
* @param genotype the string representation
|
||||
* @param base1 base1
|
||||
* @param base2 base2
|
||||
* @return the diploid genotype
|
||||
*/
|
||||
public static DiploidGenotype unorderedValueOf(String genotype) {
|
||||
if ( genotype == null || genotype.length() != 2 )
|
||||
throw new IllegalArgumentException("Diploid genotypes are represented by 2 characters");
|
||||
if ( genotype.charAt(0) > genotype.charAt(1) )
|
||||
genotype = String.format("%c%c", genotype.charAt(1), genotype.charAt(0));
|
||||
return valueOf(genotype);
|
||||
public static DiploidGenotype unorderedValueOf(char base1, char base2) {
|
||||
if ( base1 > base2 ) {
|
||||
char temp = base1;
|
||||
base1 = base2;
|
||||
base2 = temp;
|
||||
}
|
||||
return valueOf(String.format("%c%c", base1, base2));
|
||||
}
|
||||
}
|
||||
|
|
@ -12,8 +12,20 @@ public interface GenotypeCall extends Genotype {
|
|||
|
||||
/**
|
||||
*
|
||||
* @param variation the Variation object associated with this genotype
|
||||
* @param variation the Variation object associated with this genotype
|
||||
*/
|
||||
public void setVariation(Variation variation);
|
||||
|
||||
/**
|
||||
*
|
||||
* @param genotype the genotype
|
||||
*/
|
||||
public void setGenotype(DiploidGenotype genotype);
|
||||
|
||||
/**
|
||||
*
|
||||
* @param value -log10PEror
|
||||
*/
|
||||
public void setNegLog10PError(double value);
|
||||
|
||||
}
|
||||
|
|
@ -4,8 +4,8 @@ package org.broadinstitute.sting.utils.genotype;
|
|||
* @author aaron
|
||||
* Interface PosteriorsBacked
|
||||
*
|
||||
* this interface indicates that the genotype is
|
||||
* backed up by genotype posterior information.
|
||||
* This interface indicates that the genotype is backed up by *diploid* genotype posterior information.
|
||||
* The posteriors array is expected to have 10 entries (one for each of the possible diploid genotypes).
|
||||
*/
|
||||
public interface PosteriorsBacked {
|
||||
|
||||
|
|
|
|||
|
|
@ -82,6 +82,14 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot
|
|||
this.mVariation = variation;
|
||||
}
|
||||
|
||||
public void setGenotype(DiploidGenotype genotype) {
|
||||
; // do nothing: geli uses diploid posteriors to calculate the genotype
|
||||
}
|
||||
|
||||
public void setNegLog10PError(double value) {
|
||||
; // do nothing: geli uses diploid posteriors to calculate the P(error)
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
lazyEval();
|
||||
|
|
|
|||
|
|
@ -65,6 +65,10 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac
|
|||
mGenotype = genotype;
|
||||
}
|
||||
|
||||
public void setGenotype(DiploidGenotype genotype) {
|
||||
setGenotype(genotype.toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == null || !(other instanceof GLFGenotypeCall))
|
||||
|
|
|
|||
|
|
@ -4,8 +4,6 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
|
|
@ -20,14 +18,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
|
|||
|
||||
private ReadBackedPileup mPileup = null;
|
||||
private int mCoverage = 0;
|
||||
private double[] mPosteriors;
|
||||
private double mNegLog10PError = -1;
|
||||
|
||||
private Variation mVariation = null;
|
||||
|
||||
// the reference genotype, the best genotype, and the next best genotype, lazy loaded
|
||||
private DiploidGenotype mRefGenotype = null;
|
||||
private DiploidGenotype mBestGenotype = null;
|
||||
private DiploidGenotype mNextGenotype = null;
|
||||
// the best genotype
|
||||
private DiploidGenotype mGenotype = null;
|
||||
|
||||
// the sample name, used to propulate the SampleBacked interface
|
||||
private String mSampleName;
|
||||
|
|
@ -38,44 +34,30 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
|
|||
mLocation = loc;
|
||||
|
||||
// fill in empty data
|
||||
mPosteriors = new double[10];
|
||||
Arrays.fill(mPosteriors, Double.MIN_VALUE);
|
||||
mGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
mSampleName = "";
|
||||
}
|
||||
|
||||
public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, int coverage, String sample) {
|
||||
public VCFGenotypeCall(char ref, GenomeLoc loc, DiploidGenotype genotype, double negLog10PError, int coverage, String sample) {
|
||||
super(ref);
|
||||
mRefBase = ref;
|
||||
mLocation = loc;
|
||||
mBestGenotype = DiploidGenotype.unorderedValueOf(genotype);
|
||||
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
mSampleName = sample;
|
||||
mGenotype = genotype;
|
||||
mNegLog10PError = negLog10PError;
|
||||
mCoverage = coverage;
|
||||
|
||||
// set general posteriors to min double value
|
||||
mPosteriors = new double[10];
|
||||
Arrays.fill(mPosteriors, Double.MIN_VALUE);
|
||||
|
||||
// set the ref to PError
|
||||
mPosteriors[mRefGenotype.ordinal()] = -1.0 * negLog10PError;
|
||||
|
||||
// set the best genotype to zero (need to do this after ref in case ref=best)
|
||||
mPosteriors[mBestGenotype.ordinal()] = 0.0;
|
||||
|
||||
// choose a smart next best genotype and set it to PError
|
||||
if ( mBestGenotype == mRefGenotype )
|
||||
mNextGenotype = DiploidGenotype.valueOf(BaseUtils.simpleComplement(genotype));
|
||||
else
|
||||
mNextGenotype = mRefGenotype;
|
||||
mPosteriors[mNextGenotype.ordinal()] = -1.0 * negLog10PError;
|
||||
mSampleName = sample;
|
||||
}
|
||||
|
||||
public void setPileup(ReadBackedPileup pileup) {
|
||||
mPileup = pileup;
|
||||
}
|
||||
|
||||
public void setPosteriors(double[] posteriors) {
|
||||
mPosteriors = posteriors;
|
||||
public void setGenotype(DiploidGenotype genotype) {
|
||||
mGenotype = genotype;
|
||||
}
|
||||
|
||||
public void setNegLog10PError(double negLog10PError) {
|
||||
mNegLog10PError = negLog10PError;
|
||||
}
|
||||
|
||||
public void setVariation(Variation variation) {
|
||||
|
|
@ -89,53 +71,21 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
|
|||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
lazyEval();
|
||||
|
||||
if (other == null)
|
||||
if ( other == null || !(other instanceof VCFGenotypeCall) )
|
||||
return false;
|
||||
if (other instanceof VCFGenotypeCall) {
|
||||
VCFGenotypeCall otherCall = (VCFGenotypeCall) other;
|
||||
|
||||
if (!this.mBestGenotype.equals(otherCall.mBestGenotype))
|
||||
return false;
|
||||
return (this.mRefBase == otherCall.mRefBase && this.mLocation.equals(otherCall.mLocation));
|
||||
}
|
||||
return false;
|
||||
VCFGenotypeCall otherCall = (VCFGenotypeCall) other;
|
||||
|
||||
return mGenotype.equals(otherCall.mGenotype) &&
|
||||
mNegLog10PError == otherCall.mNegLog10PError &&
|
||||
mLocation.equals(otherCall.mLocation) &&
|
||||
mRefBase == otherCall.mRefBase;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
lazyEval();
|
||||
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError=%.2f",
|
||||
getLocation(), mBestGenotype, mRefGenotype, mRefBase, getReadCount(), getNegLog10PError());
|
||||
}
|
||||
|
||||
private void lazyEval() {
|
||||
if (mBestGenotype == null) {
|
||||
char ref = this.getReference();
|
||||
char alt = this.getAlternateAllele();
|
||||
|
||||
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
|
||||
// if we are constrained to a single alternate allele, use only that one
|
||||
if ( alt != AlleleConstrainedGenotype.NO_CONSTRAINT ) {
|
||||
DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
|
||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
|
||||
boolean hetOverHom = mPosteriors[hetGenotype.ordinal()] > mPosteriors[homGenotype.ordinal()];
|
||||
boolean hetOverRef = mPosteriors[hetGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()];
|
||||
boolean homOverRef = mPosteriors[homGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()];
|
||||
if ( hetOverHom ) {
|
||||
mBestGenotype = (hetOverRef ? hetGenotype : mRefGenotype);
|
||||
mNextGenotype = (!hetOverRef ? hetGenotype : (homOverRef ? homGenotype : mRefGenotype));
|
||||
} else {
|
||||
mBestGenotype = (homOverRef ? homGenotype : mRefGenotype);
|
||||
mNextGenotype = (!homOverRef ? homGenotype : (hetOverRef ? hetGenotype : mRefGenotype));
|
||||
}
|
||||
} else {
|
||||
Integer sorted[] = Utils.SortPermutation(mPosteriors);
|
||||
mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
|
||||
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
||||
}
|
||||
}
|
||||
return String.format("%s best=%s ref=%s depth=%d negLog10PError=%.2f",
|
||||
getLocation(), mGenotype, mRefBase, getReadCount(), getNegLog10PError());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -144,25 +94,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
|
|||
* @return get the one minus error value
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
return Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getNextBest().ordinal()]);
|
||||
return mNegLog10PError;
|
||||
}
|
||||
|
||||
// get the best genotype
|
||||
protected DiploidGenotype getBestGenotype() {
|
||||
lazyEval();
|
||||
return mBestGenotype;
|
||||
}
|
||||
|
||||
// get the alternate genotype
|
||||
private DiploidGenotype getNextBest() {
|
||||
lazyEval();
|
||||
return mNextGenotype;
|
||||
}
|
||||
|
||||
// get the ref genotype
|
||||
private DiploidGenotype getRefGenotype() {
|
||||
lazyEval();
|
||||
return mRefGenotype;
|
||||
return mGenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -238,11 +175,14 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
|
|||
public Variation toVariation(char ref) {
|
||||
if ( mVariation == null ) {
|
||||
VCFVariationCall var = new VCFVariationCall(ref, mLocation, isVariant() ? Variation.VARIANT_TYPE.SNP : Variation.VARIANT_TYPE.REFERENCE);
|
||||
double confidence = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]);
|
||||
var.setConfidence(confidence);
|
||||
if ( isVariant() )
|
||||
var.addAlternateAllele(Character.toString(mBestGenotype.base1 != ref ? mBestGenotype.base1 : mBestGenotype.base2));
|
||||
mVariation = var;
|
||||
var.setConfidence(10 * mNegLog10PError);
|
||||
if ( !mGenotype.isHomRef(ref) ) {
|
||||
if ( mGenotype.base1 != ref )
|
||||
var.addAlternateAllele(Character.toString(mGenotype.base1));
|
||||
if ( mGenotype.isHet() && mGenotype.base2 != ref )
|
||||
var.addAlternateAllele(Character.toString(mGenotype.base2));
|
||||
}
|
||||
mVariation = var;
|
||||
}
|
||||
return mVariation;
|
||||
}
|
||||
|
|
@ -274,15 +214,6 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
|
|||
return mRefBase;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the posteriors
|
||||
*
|
||||
* @return the posteriors
|
||||
*/
|
||||
public double[] getPosteriors() {
|
||||
return mPosteriors;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return returns the sample name for this genotype
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
|
|||
private char mReferenceBase;
|
||||
// our location
|
||||
private GenomeLoc mLoc;
|
||||
// our id; set to '.' if not available
|
||||
// our id
|
||||
private String mID;
|
||||
// the alternate bases
|
||||
private final List<VCFGenotypeEncoding> mAlts = new ArrayList<VCFGenotypeEncoding>();
|
||||
|
|
|
|||
|
|
@ -179,6 +179,8 @@ public class VCFUtils {
|
|||
if ( freqsSeen > 0 )
|
||||
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
|
||||
|
||||
// TODO -- "." and "0" are wrong -- need to use values from the records
|
||||
|
||||
return new VCFRecord(params.getReferenceBase(),
|
||||
params.getContig(),
|
||||
params.getPosition(),
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("8f9ee611603dc75c7ca8b15d81000a82"));
|
||||
Arrays.asList("ad00b255c83fe43213758b280646e4b4"));
|
||||
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("a8979577debf9c8d7ea4a40ee1a0f1ef"));
|
||||
Arrays.asList("e709714e288c508f79c1ffc5cdbe9cca"));
|
||||
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testPooled1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
|
||||
Arrays.asList("253aba7ce0af7be4104dd275873a541d"));
|
||||
Arrays.asList("2214983d4ae3a19bc16e7cb89fc3128f"));
|
||||
executeTest("testPooled1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("1be2ce6ac40bdaf3d8f264f6d73c84ae"));
|
||||
Arrays.asList("9e98eba56add9989e2d40834efc8c803"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("8f92a56da4bd56b8755e9db89c31bfff"));
|
||||
Arrays.asList("fe1077636feb41c69f7c4f64147e2bba"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("67913655a8063f4deb9a6138f0844ddd"));
|
||||
Arrays.asList("6a739a0acb620aeed5d8e0de7b59877a"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -105,7 +105,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeModeJoint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -genotype -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
|
||||
Arrays.asList("d51760ad01195ab299925595c6f62bb4"));
|
||||
Arrays.asList("85fbcdd816ecc1d9686df244a6e039dd"));
|
||||
executeTest("testGenotypeMode - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -113,7 +113,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testAllBasesModeJoint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -all_bases -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
|
||||
Arrays.asList("2322d3ebf09ebf901fbcb653119d0b47"));
|
||||
Arrays.asList("f871576cf49e7a83a0f6b5580b0178c2"));
|
||||
executeTest("testAllBasesMode - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue