1. The joint estimation model now constrains genotypes to be AA,AB,or BB only (i.e. to use a single alternate allele). Note that this doesn't work for the old models (point estimate or SSG) because calculations aren't divided by alternate allele.

2. Allele frequency spectrum is not emitted for single samples (since it doesn't make sense).
3. If in pooled mode, throw an exception of pool size isn't set appropriately.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2072 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-18 17:43:15 +00:00
parent 405c6bf2c1
commit 0a35c8e0ba
7 changed files with 129 additions and 34 deletions

View File

@ -68,7 +68,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
return PofDgivenAFi;
}
protected List<Genotype> makeGenotypeCalls(char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
protected List<Genotype> makeGenotypeCalls(char ref, char alt, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
ArrayList<Genotype> calls = new ArrayList<Genotype>();
for ( String sample : GLs.keySet() ) {
@ -89,6 +89,9 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
if ( call instanceof PosteriorsBacked ) {
((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
}
if ( call instanceof AlleleConstrainedGenotype ) {
((AlleleConstrainedGenotype)call).setAlternateAllele(alt);
}
calls.add(call);
}

View File

@ -7,7 +7,6 @@ import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*;
import java.io.PrintStream;
import java.io.PrintWriter;
public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {
@ -217,7 +216,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
verboseWriter.println();
}
protected List<Genotype> makeGenotypeCalls(char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
protected List<Genotype> makeGenotypeCalls(char ref, char alt, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
// by default, we return no genotypes
return new ArrayList<Genotype>();
}
@ -246,7 +245,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
// populate the sample-specific data
List<Genotype> calls = makeGenotypeCalls(ref, contexts, loc);
List<Genotype> calls = makeGenotypeCalls(ref, baseOfMax, contexts, loc);
// next, the general locus data
// note that calculating strand bias involves overwriting data structures, so we do that last
@ -257,8 +256,11 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
}
if ( locusdata instanceof AlleleFrequencyBacked ) {
((AlleleFrequencyBacked)locusdata).setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
AlleleFrequencyBacked.AlleleFrequencyRange range = computeAFrange(alleleFrequencyPosteriors[indexOfMax], frequencyEstimationPoints-1, bestAFguess, ALLELE_FREQUENCY_RANGE);
((AlleleFrequencyBacked)locusdata).setAlleleFrequencyRange(range);
// frequenc range doesn't make sense for single samples
if ( getNSamples(contexts) > 1 ) {
AlleleFrequencyBacked.AlleleFrequencyRange range = computeAFrange(alleleFrequencyPosteriors[indexOfMax], frequencyEstimationPoints-1, bestAFguess, ALLELE_FREQUENCY_RANGE);
((AlleleFrequencyBacked)locusdata).setAlleleFrequencyRange(range);
}
}
if ( locusdata instanceof IDBacked ) {
rodDbSNP dbsnp = getDbSNP(tracker);

View File

@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
import org.broadinstitute.sting.utils.genotype.*;
@ -95,14 +94,17 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
// deal with input errors
if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
throw new StingException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
throw new IllegalArgumentException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
}
if ( UAC.POOLSIZE < 1 && UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ) {
throw new IllegalArgumentException("Attempting to use the POOLED model with a pool size less than 1. Please set the pool size to an appropriate value.");
}
if ( UAC.LOD_THRESHOLD > Double.MIN_VALUE ) {
StringBuilder sb = new StringBuilder();
sb.append("\n***\tThe --lod_threshold argument is no longer supported; instead, please use --min_confidence_threshold.");
sb.append("\n***\tThere is approximately a 10-to-1 mapping from confidence to LOD.");
sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff");
throw new StingException(sb.toString());
throw new IllegalArgumentException(sb.toString());
}
// get all of the unique sample names

View File

@ -0,0 +1,54 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author ebanks
* <p/>
* Class AlleleConstrainedGenotype
* <p/>
* A genotype that can have only one of 3 genotypes AA,AB,BB
*/
public abstract class AlleleConstrainedGenotype implements Genotype, PosteriorsBacked {
protected final static char NO_CONSTRAINT = 'N';
private char ref = NO_CONSTRAINT;
private char alt = NO_CONSTRAINT;
public AlleleConstrainedGenotype(char ref) {
this.ref = ref;
}
/**
* set the allowed alternate allele
*
* @param alt the alternate allele
*/
public void setAlternateAllele(char alt) {
this.alt = alt;
}
/**
* @return returns the allowed alternate allele
*/
public char getAlternateAllele() {
return alt;
}
/**
* @return returns the best genotype
*/
protected abstract DiploidGenotype getBestGenotype();
/**
* get the bases that represent this
*
* @return the bases, as a string
*/
public String getBases() {
DiploidGenotype g = getBestGenotype();
if ( alt != NO_CONSTRAINT && ((g.base1 != ref && g.base1 != alt) || (g.base2 != ref && g.base2 != alt)) )
throw new IllegalStateException("The best genotype " + g + " is composed of an allele that is not " + ref + " or " + alt);
return g.toString();
}
}

View File

@ -13,7 +13,7 @@ import java.util.Arrays;
* <p/>
* The implementation of the genotype interface, specific to Geli
*/
public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked {
public class GeliGenotypeCall extends AlleleConstrainedGenotype implements ReadBacked, PosteriorsBacked {
private final char mRefBase;
private final GenomeLoc mLocation;
@ -33,6 +33,7 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
* @param loc the genome loc
*/
public GeliGenotypeCall(char ref, GenomeLoc loc) {
super(ref);
mRefBase = ref;
mLocation = loc;
@ -42,6 +43,7 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
}
public GeliGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError) {
super(ref);
mRefBase = ref;
mLocation = loc;
mBestGenotype = DiploidGenotype.valueOf(genotype);
@ -99,10 +101,30 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
private void lazyEval() {
if (mBestGenotype == null) {
Integer sorted[] = Utils.SortPermutation(mPosteriors);
mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
mRefGenotype = DiploidGenotype.createHomGenotype(this.getReference());
char ref = this.getReference();
char alt = this.getAlternateAllele();
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
// if we are constrained to a single alternate allele, use only that one
if ( alt != AlleleConstrainedGenotype.NO_CONSTRAINT ) {
DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
boolean hetOverHom = mPosteriors[hetGenotype.ordinal()] > mPosteriors[homGenotype.ordinal()];
boolean hetOverRef = mPosteriors[hetGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()];
boolean homOverRef = mPosteriors[homGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()];
if ( hetOverHom ) {
mBestGenotype = (hetOverRef ? hetGenotype : mRefGenotype);
mNextGenotype = (!hetOverRef ? hetGenotype : (homOverRef ? homGenotype : mRefGenotype));
} else {
mBestGenotype = (homOverRef ? homGenotype : mRefGenotype);
mNextGenotype = (!homOverRef ? homGenotype : (hetOverRef ? hetGenotype : mRefGenotype));
}
} else {
Integer sorted[] = Utils.SortPermutation(mPosteriors);
mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
}
}
}
@ -117,7 +139,7 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
}
// get the best genotype
private DiploidGenotype getBestGenotype() {
protected DiploidGenotype getBestGenotype() {
lazyEval();
return mBestGenotype;
}

View File

@ -13,7 +13,7 @@ import java.util.Arrays;
* <p/>
* The implementation of the genotype interface, specific to VCF
*/
public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, SampleBacked {
public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBacked, SampleBacked {
private final char mRefBase;
private final GenomeLoc mLocation;
@ -31,6 +31,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
private String mSampleName;
public VCFGenotypeCall(char ref, GenomeLoc loc) {
super(ref);
mRefBase = ref;
mLocation = loc;
@ -41,6 +42,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
}
public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, int coverage, String sample) {
super(ref);
mRefBase = ref;
mLocation = loc;
mBestGenotype = DiploidGenotype.unorderedValueOf(genotype);
@ -103,14 +105,33 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
private void lazyEval() {
if (mBestGenotype == null) {
Integer sorted[] = Utils.SortPermutation(mPosteriors);
mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
mRefGenotype = DiploidGenotype.createHomGenotype(this.getReference());
char ref = this.getReference();
char alt = this.getAlternateAllele();
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
// if we are constrained to a single alternate allele, use only that one
if ( alt != AlleleConstrainedGenotype.NO_CONSTRAINT ) {
DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref));
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
boolean hetOverHom = mPosteriors[hetGenotype.ordinal()] > mPosteriors[homGenotype.ordinal()];
boolean hetOverRef = mPosteriors[hetGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()];
boolean homOverRef = mPosteriors[homGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()];
if ( hetOverHom ) {
mBestGenotype = (hetOverRef ? hetGenotype : mRefGenotype);
mNextGenotype = (!hetOverRef ? hetGenotype : (homOverRef ? homGenotype : mRefGenotype));
} else {
mBestGenotype = (homOverRef ? homGenotype : mRefGenotype);
mNextGenotype = (!homOverRef ? homGenotype : (hetOverRef ? hetGenotype : mRefGenotype));
}
} else {
Integer sorted[] = Utils.SortPermutation(mPosteriors);
mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
}
}
}
/**
* get the confidence we have
*
@ -121,7 +142,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
}
// get the best genotype
private DiploidGenotype getBestGenotype() {
protected DiploidGenotype getBestGenotype() {
lazyEval();
return mBestGenotype;
}
@ -138,15 +159,6 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
return mRefGenotype;
}
/**
* get the bases that represent this
*
* @return the bases, as a string
*/
public String getBases() {
return getBestGenotype().toString();
}
/**
* get the ploidy
*

View File

@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("e4d32d41544fc12c1553f780be3fd272"));
Arrays.asList("3a33873d7284cc113a639ee12c925ac0"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("d0bfdedca0944a0b05d53f1a75408a57"));
Arrays.asList("bcdb54d8867543c5c316b65b27b0d3d6"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -84,7 +84,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,067,000-10,083,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("e2e4c147075186245d8eb1f8d3bb8705"));
Arrays.asList("300efc0306ae89e38b67c77175fe828d"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}