Pull the genotype (and genotype quality) calculation out of the VCF code and into the Genotyper.

[Also, enable Mark's new UG arguments]



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2355 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-15 04:29:28 +00:00
parent 2cbc85cc7a
commit 874552ff75
15 changed files with 182 additions and 160 deletions

View File

@ -31,7 +31,6 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
// use flat priors for GLs
DiploidGenotypePriors priors = new DiploidGenotypePriors();
int index = 0;
for ( String sample : contexts.keySet() ) {
StratifiedAlignmentContext context = contexts.get(sample);
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
@ -47,12 +46,11 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
for ( char alt : BaseUtils.BASES ) {
if ( alt != ref ) {
DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
DiploidGenotype hetGenotype = DiploidGenotype.unorderedValueOf(ref, alt);
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
AFMatrixMap.get(alt).setLikelihoods(posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()], index);
AFMatrixMap.get(alt).setLikelihoods(posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()], sample);
}
}
index++;
}
}
@ -74,14 +72,31 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
}
}
protected List<Genotype> makeGenotypeCalls(char ref, char alt, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
protected List<Genotype> makeGenotypeCalls(char ref, char alt, int frequency, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
ArrayList<Genotype> calls = new ArrayList<Genotype>();
// set up some variables we'll need in the loop
AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt);
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
DiploidGenotype hetGenotype = DiploidGenotype.unorderedValueOf(ref, alt);
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
for ( String sample : GLs.keySet() ) {
// create the call
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
// set the genotype and confidence
Pair<Integer, Double> AFbasedGenotype = matrix.getGenotype(frequency, sample);
call.setNegLog10PError(AFbasedGenotype.second);
if ( AFbasedGenotype.first == GenotypeType.REF.ordinal() )
call.setGenotype(refGenotype);
else if ( AFbasedGenotype.first == GenotypeType.HET.ordinal() )
call.setGenotype(hetGenotype);
else // ( AFbasedGenotype.first == GenotypeType.HOM.ordinal() )
call.setGenotype(homGenotype);
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
((ReadBacked)call).setPileup(pileup);
@ -105,12 +120,17 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
return calls;
}
protected class AlleleFrequencyMatrix {
private double[][] matrix;
private int[] indexes;
private int N;
private int frequency;
private double[][] matrix; // allele frequency matrix
private int[] indexes; // matrix to maintain which genotype is active
private int N; // total frequencies
private int frequency; // current frequency
// data structures necessary to maintain a list of the best genotypes and their scores
private ArrayList<String> samples = new ArrayList<String>();
private HashMap<Integer, HashMap<String, Pair<Integer, Double>>> samplesToGenotypesPerAF = new HashMap<Integer, HashMap<String, Pair<Integer, Double>>>();
public AlleleFrequencyMatrix(int N) {
this.N = N;
@ -121,7 +141,9 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
indexes[i] = 0;
}
public void setLikelihoods(double AA, double AB, double BB, int index) {
public void setLikelihoods(double AA, double AB, double BB, String sample) {
int index = samples.size();
samples.add(sample);
matrix[index][GenotypeType.REF.ordinal()] = AA;
matrix[index][GenotypeType.HET.ordinal()] = AB;
matrix[index][GenotypeType.HOM.ordinal()] = BB;
@ -169,17 +191,48 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
for (int i = 0; i < N; i++)
likelihoods += matrix[i][indexes[i]];
//verboseWriter.write(frequency + "\n");
//for (int i = 0; i < N; i++) {
// for (int j=0; j < 3; j++) {
// verboseWriter.write(String.valueOf(matrix[i][j]));
// verboseWriter.write(indexes[i] == j ? "* " : " ");
// }
// verboseWriter.write("\n");
//}
//verboseWriter.write(likelihoods + "\n\n");
/*
System.out.println(frequency);
for (int i = 0; i < N; i++) {
for (int j=0; j < 3; j++) {
System.out.print(String.valueOf(matrix[i][j]));
System.out.print(indexes[i] == j ? "* " : " ");
}
System.out.println();
}
System.out.println(likelihoods);
System.out.println();
*/
recordGenotypes();
return likelihoods;
}
public Pair<Integer, Double> getGenotype(int frequency, String sample) {
return samplesToGenotypesPerAF.get(frequency).get(sample);
}
private void recordGenotypes() {
HashMap<String, Pair<Integer, Double>> samplesToGenotypes = new HashMap<String, Pair<Integer, Double>>();
int index = 0;
for ( String sample : samples ) {
int genotype = indexes[index];
double score;
if ( genotype == GenotypeType.REF.ordinal() )
score = matrix[index][GenotypeType.REF.ordinal()] - Math.max(matrix[index][GenotypeType.HET.ordinal()], matrix[index][GenotypeType.HOM.ordinal()]);
else if ( genotype == GenotypeType.HET.ordinal() )
score = matrix[index][GenotypeType.HET.ordinal()] - Math.max(matrix[index][GenotypeType.REF.ordinal()], matrix[index][GenotypeType.HOM.ordinal()]);
else // ( genotype == GenotypeType.HOM.ordinal() )
score = matrix[index][GenotypeType.HOM.ordinal()] - Math.max(matrix[index][GenotypeType.REF.ordinal()], matrix[index][GenotypeType.HET.ordinal()]);
samplesToGenotypes.put(sample, new Pair<Integer, Double>(genotype, Math.abs(score)));
index++;
}
samplesToGenotypesPerAF.put(frequency, samplesToGenotypes);
}
}
}

View File

@ -96,6 +96,14 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());
// set the genotype and confidence
double[] posteriors = GLs.get(sample).getPosteriors();
Integer sorted[] = Utils.SortPermutation(posteriors);
DiploidGenotype bestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
DiploidGenotype nextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
call.setNegLog10PError(posteriors[bestGenotype.ordinal()] - posteriors[nextGenotype.ordinal()]);
call.setGenotype(bestGenotype);
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
((ReadBacked)call).setPileup(pileup);
@ -107,7 +115,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
}
if ( call instanceof PosteriorsBacked ) {
((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
((PosteriorsBacked)call).setPosteriors(posteriors);
}
calls.add(call);

View File

@ -299,7 +299,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
verboseWriter.println();
}
protected List<Genotype> makeGenotypeCalls(char ref, char alt, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
protected List<Genotype> makeGenotypeCalls(char ref, char alt, int frequency, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
// by default, we return no genotypes
return new ArrayList<Genotype>();
}
@ -329,7 +329,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
return new Pair<VariationCall, List<Genotype>>(null, null);
// populate the sample-specific data
List<Genotype> calls = makeGenotypeCalls(ref, bestAlternateAllele, contexts, loc);
List<Genotype> calls = makeGenotypeCalls(ref, bestAlternateAllele, bestAFguess, contexts, loc);
// next, the general locus data
// *** note that calculating strand bias involves overwriting data structures, so we do that last

View File

@ -43,9 +43,9 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
// find the index of the best genotype
double[] posteriors = discoveryGL.second.getNormalizedPosteriors();
Integer sortedPosteriors[] = Utils.SortPermutation(posteriors);
int bestIndex = sortedPosteriors[sortedPosteriors.length - 1];
double[] normPosteriors = discoveryGL.second.getNormalizedPosteriors();
Integer sortedNormPosteriors[] = Utils.SortPermutation(normPosteriors);
int bestIndex = sortedNormPosteriors[sortedNormPosteriors.length - 1];
// flag to determine if ref is the best call (not necessary in genotype mode)
boolean bestIsRef = false;
@ -53,11 +53,11 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
// calculate the phred-scaled confidence score
double phredScaledConfidence;
if ( GENOTYPE_MODE ) {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(1.0 - posteriors[bestIndex]);
phredScaledConfidence = QualityUtils.phredScaleErrorRate(1.0 - normPosteriors[bestIndex]);
} else {
int refIndex = DiploidGenotype.createHomGenotype(ref).ordinal();
bestIsRef = (refIndex == bestIndex);
double pError = (bestIsRef ? 1.0 - posteriors[refIndex] : posteriors[refIndex]);
double pError = (bestIsRef ? 1.0 - normPosteriors[refIndex] : normPosteriors[refIndex]);
phredScaledConfidence = QualityUtils.phredScaleErrorRate(pError);
}
@ -68,6 +68,14 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
// we can now create the genotype call object
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
// set the genotype and confidence
double[] posteriors = discoveryGL.second.getPosteriors();
Integer sorted[] = Utils.SortPermutation(posteriors);
DiploidGenotype bestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
DiploidGenotype nextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
call.setNegLog10PError(posteriors[bestGenotype.ordinal()] - posteriors[nextGenotype.ordinal()]);
call.setGenotype(bestGenotype);
if ( call instanceof ReadBacked ) {
((ReadBacked)call).setPileup(discoveryGL.first);
}
@ -78,7 +86,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
((LikelihoodsBacked)call).setLikelihoods(discoveryGL.second.getLikelihoods());
}
if ( call instanceof PosteriorsBacked ) {
((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors());
((PosteriorsBacked)call).setPosteriors(posteriors);
}
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP);

View File

@ -88,10 +88,9 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "min_allele_frequency", shortName = "min_freq", doc = "The minimum possible allele frequency in a population (advanced)", required = false)
public double MINIMUM_ALLELE_FREQUENCY = 1e-8;
// Mark's filtering arguments
//@Argument(fullName = "minBaseQualityScore", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
@Argument(fullName = "minBaseQualityScore", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
public Integer MIN_BASE_QUALTY_SCORE = -1;
//@Argument(fullName = "minMappingQualityScore", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
@Argument(fullName = "minMappingQualityScore", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
public Integer MIN_MAPPING_QUALTY_SCORE = -1;
}

View File

@ -8,7 +8,7 @@ package org.broadinstitute.sting.utils.genotype;
* <p/>
* A genotype that can have only one of 3 genotypes AA,AB,BB
*/
public abstract class AlleleConstrainedGenotype implements Genotype, PosteriorsBacked {
public abstract class AlleleConstrainedGenotype implements Genotype {
protected final static char NO_CONSTRAINT = 'N';
@ -36,6 +36,7 @@ public abstract class AlleleConstrainedGenotype implements Genotype, PosteriorsB
}
/**
*
* @return returns the best genotype
*/
protected abstract DiploidGenotype getBestGenotype();

View File

@ -44,13 +44,7 @@ public enum DiploidGenotype {
}
public boolean isHet() {
switch (this) {
case AA:
case CC:
case GG:
case TT: return false;
default: return true;
}
return base1 != base2;
}
/**
@ -71,14 +65,16 @@ public enum DiploidGenotype {
/**
* get the genotype, given a string of 2 chars which may not necessarily be ordered correctly
* @param genotype the string representation
* @param base1 base1
* @param base2 base2
* @return the diploid genotype
*/
public static DiploidGenotype unorderedValueOf(String genotype) {
if ( genotype == null || genotype.length() != 2 )
throw new IllegalArgumentException("Diploid genotypes are represented by 2 characters");
if ( genotype.charAt(0) > genotype.charAt(1) )
genotype = String.format("%c%c", genotype.charAt(1), genotype.charAt(0));
return valueOf(genotype);
public static DiploidGenotype unorderedValueOf(char base1, char base2) {
if ( base1 > base2 ) {
char temp = base1;
base1 = base2;
base2 = temp;
}
return valueOf(String.format("%c%c", base1, base2));
}
}

View File

@ -12,8 +12,20 @@ public interface GenotypeCall extends Genotype {
/**
*
* @param variation the Variation object associated with this genotype
* @param variation the Variation object associated with this genotype
*/
public void setVariation(Variation variation);
/**
*
* @param genotype the genotype
*/
public void setGenotype(DiploidGenotype genotype);
/**
*
* @param value -log10PEror
*/
public void setNegLog10PError(double value);
}

View File

@ -4,8 +4,8 @@ package org.broadinstitute.sting.utils.genotype;
* @author aaron
* Interface PosteriorsBacked
*
* this interface indicates that the genotype is
* backed up by genotype posterior information.
* This interface indicates that the genotype is backed up by *diploid* genotype posterior information.
* The posteriors array is expected to have 10 entries (one for each of the possible diploid genotypes).
*/
public interface PosteriorsBacked {

View File

@ -82,6 +82,14 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot
this.mVariation = variation;
}
public void setGenotype(DiploidGenotype genotype) {
; // do nothing: geli uses diploid posteriors to calculate the genotype
}
public void setNegLog10PError(double value) {
; // do nothing: geli uses diploid posteriors to calculate the P(error)
}
@Override
public boolean equals(Object other) {
lazyEval();

View File

@ -65,6 +65,10 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac
mGenotype = genotype;
}
public void setGenotype(DiploidGenotype genotype) {
setGenotype(genotype.toString());
}
@Override
public boolean equals(Object other) {
if (other == null || !(other instanceof GLFGenotypeCall))

View File

@ -4,8 +4,6 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.*;
import java.util.Arrays;
/**
* @author ebanks
@ -20,14 +18,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
private ReadBackedPileup mPileup = null;
private int mCoverage = 0;
private double[] mPosteriors;
private double mNegLog10PError = -1;
private Variation mVariation = null;
// the reference genotype, the best genotype, and the next best genotype, lazy loaded
private DiploidGenotype mRefGenotype = null;
private DiploidGenotype mBestGenotype = null;
private DiploidGenotype mNextGenotype = null;
// the best genotype
private DiploidGenotype mGenotype = null;
// the sample name, used to propulate the SampleBacked interface
private String mSampleName;
@ -38,44 +34,30 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
mLocation = loc;
// fill in empty data
mPosteriors = new double[10];
Arrays.fill(mPosteriors, Double.MIN_VALUE);
mGenotype = DiploidGenotype.createHomGenotype(ref);
mSampleName = "";
}
public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, int coverage, String sample) {
public VCFGenotypeCall(char ref, GenomeLoc loc, DiploidGenotype genotype, double negLog10PError, int coverage, String sample) {
super(ref);
mRefBase = ref;
mLocation = loc;
mBestGenotype = DiploidGenotype.unorderedValueOf(genotype);
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
mSampleName = sample;
mGenotype = genotype;
mNegLog10PError = negLog10PError;
mCoverage = coverage;
// set general posteriors to min double value
mPosteriors = new double[10];
Arrays.fill(mPosteriors, Double.MIN_VALUE);
// set the ref to PError
mPosteriors[mRefGenotype.ordinal()] = -1.0 * negLog10PError;
// set the best genotype to zero (need to do this after ref in case ref=best)
mPosteriors[mBestGenotype.ordinal()] = 0.0;
// choose a smart next best genotype and set it to PError
if ( mBestGenotype == mRefGenotype )
mNextGenotype = DiploidGenotype.valueOf(BaseUtils.simpleComplement(genotype));
else
mNextGenotype = mRefGenotype;
mPosteriors[mNextGenotype.ordinal()] = -1.0 * negLog10PError;
mSampleName = sample;
}
public void setPileup(ReadBackedPileup pileup) {
mPileup = pileup;
}
public void setPosteriors(double[] posteriors) {
mPosteriors = posteriors;
public void setGenotype(DiploidGenotype genotype) {
mGenotype = genotype;
}
public void setNegLog10PError(double negLog10PError) {
mNegLog10PError = negLog10PError;
}
public void setVariation(Variation variation) {
@ -89,53 +71,21 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
@Override
public boolean equals(Object other) {
lazyEval();
if (other == null)
if ( other == null || !(other instanceof VCFGenotypeCall) )
return false;
if (other instanceof VCFGenotypeCall) {
VCFGenotypeCall otherCall = (VCFGenotypeCall) other;
if (!this.mBestGenotype.equals(otherCall.mBestGenotype))
return false;
return (this.mRefBase == otherCall.mRefBase && this.mLocation.equals(otherCall.mLocation));
}
return false;
VCFGenotypeCall otherCall = (VCFGenotypeCall) other;
return mGenotype.equals(otherCall.mGenotype) &&
mNegLog10PError == otherCall.mNegLog10PError &&
mLocation.equals(otherCall.mLocation) &&
mRefBase == otherCall.mRefBase;
}
public String toString() {
lazyEval();
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError=%.2f",
getLocation(), mBestGenotype, mRefGenotype, mRefBase, getReadCount(), getNegLog10PError());
}
private void lazyEval() {
if (mBestGenotype == null) {
char ref = this.getReference();
char alt = this.getAlternateAllele();
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
// if we are constrained to a single alternate allele, use only that one
if ( alt != AlleleConstrainedGenotype.NO_CONSTRAINT ) {
DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
boolean hetOverHom = mPosteriors[hetGenotype.ordinal()] > mPosteriors[homGenotype.ordinal()];
boolean hetOverRef = mPosteriors[hetGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()];
boolean homOverRef = mPosteriors[homGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()];
if ( hetOverHom ) {
mBestGenotype = (hetOverRef ? hetGenotype : mRefGenotype);
mNextGenotype = (!hetOverRef ? hetGenotype : (homOverRef ? homGenotype : mRefGenotype));
} else {
mBestGenotype = (homOverRef ? homGenotype : mRefGenotype);
mNextGenotype = (!homOverRef ? homGenotype : (hetOverRef ? hetGenotype : mRefGenotype));
}
} else {
Integer sorted[] = Utils.SortPermutation(mPosteriors);
mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
}
}
return String.format("%s best=%s ref=%s depth=%d negLog10PError=%.2f",
getLocation(), mGenotype, mRefBase, getReadCount(), getNegLog10PError());
}
/**
@ -144,25 +94,12 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
* @return get the one minus error value
*/
public double getNegLog10PError() {
return Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getNextBest().ordinal()]);
return mNegLog10PError;
}
// get the best genotype
protected DiploidGenotype getBestGenotype() {
lazyEval();
return mBestGenotype;
}
// get the alternate genotype
private DiploidGenotype getNextBest() {
lazyEval();
return mNextGenotype;
}
// get the ref genotype
private DiploidGenotype getRefGenotype() {
lazyEval();
return mRefGenotype;
return mGenotype;
}
/**
@ -238,11 +175,14 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
public Variation toVariation(char ref) {
if ( mVariation == null ) {
VCFVariationCall var = new VCFVariationCall(ref, mLocation, isVariant() ? Variation.VARIANT_TYPE.SNP : Variation.VARIANT_TYPE.REFERENCE);
double confidence = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]);
var.setConfidence(confidence);
if ( isVariant() )
var.addAlternateAllele(Character.toString(mBestGenotype.base1 != ref ? mBestGenotype.base1 : mBestGenotype.base2));
mVariation = var;
var.setConfidence(10 * mNegLog10PError);
if ( !mGenotype.isHomRef(ref) ) {
if ( mGenotype.base1 != ref )
var.addAlternateAllele(Character.toString(mGenotype.base1));
if ( mGenotype.isHet() && mGenotype.base2 != ref )
var.addAlternateAllele(Character.toString(mGenotype.base2));
}
mVariation = var;
}
return mVariation;
}
@ -274,15 +214,6 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
return mRefBase;
}
/**
* get the posteriors
*
* @return the posteriors
*/
public double[] getPosteriors() {
return mPosteriors;
}
/**
* @return returns the sample name for this genotype
*/

View File

@ -42,7 +42,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
private char mReferenceBase;
// our location
private GenomeLoc mLoc;
// our id; set to '.' if not available
// our id
private String mID;
// the alternate bases
private final List<VCFGenotypeEncoding> mAlts = new ArrayList<VCFGenotypeEncoding>();

View File

@ -179,6 +179,8 @@ public class VCFUtils {
if ( freqsSeen > 0 )
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
// TODO -- "." and "0" are wrong -- need to use values from the records
return new VCFRecord(params.getReferenceBase(),
params.getContig(),
params.getPosition(),

View File

@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("8f9ee611603dc75c7ca8b15d81000a82"));
Arrays.asList("ad00b255c83fe43213758b280646e4b4"));
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
}
@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("a8979577debf9c8d7ea4a40ee1a0f1ef"));
Arrays.asList("e709714e288c508f79c1ffc5cdbe9cca"));
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
}
@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testPooled1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
Arrays.asList("253aba7ce0af7be4104dd275873a541d"));
Arrays.asList("2214983d4ae3a19bc16e7cb89fc3128f"));
executeTest("testPooled1", spec);
}
@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("1be2ce6ac40bdaf3d8f264f6d73c84ae"));
Arrays.asList("9e98eba56add9989e2d40834efc8c803"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("8f92a56da4bd56b8755e9db89c31bfff"));
Arrays.asList("fe1077636feb41c69f7c4f64147e2bba"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("67913655a8063f4deb9a6138f0844ddd"));
Arrays.asList("6a739a0acb620aeed5d8e0de7b59877a"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}
@ -105,7 +105,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testGenotypeModeJoint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -genotype -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
Arrays.asList("d51760ad01195ab299925595c6f62bb4"));
Arrays.asList("85fbcdd816ecc1d9686df244a6e039dd"));
executeTest("testGenotypeMode - Joint Estimate", spec);
}
@ -113,7 +113,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testAllBasesModeJoint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -all_bases -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
Arrays.asList("2322d3ebf09ebf901fbcb653119d0b47"));
Arrays.asList("f871576cf49e7a83a0f6b5580b0178c2"));
executeTest("testAllBasesMode - Joint Estimate", spec);
}