diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index 4f46c58fe..b071960af 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -68,7 +68,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul return PofDgivenAFi; } - protected List makeGenotypeCalls(char ref, HashMap contexts, GenomeLoc loc) { + protected List makeGenotypeCalls(char ref, char alt, HashMap contexts, GenomeLoc loc) { ArrayList calls = new ArrayList(); for ( String sample : GLs.keySet() ) { @@ -89,6 +89,9 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul if ( call instanceof PosteriorsBacked ) { ((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors()); } + if ( call instanceof AlleleConstrainedGenotype ) { + ((AlleleConstrainedGenotype)call).setAlternateAllele(alt); + } calls.add(call); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index dd1d6039c..ee163dce7 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -7,7 +7,6 @@ import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.*; -import java.io.PrintStream; import java.io.PrintWriter; public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel { @@ -217,7 +216,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc verboseWriter.println(); } - protected List makeGenotypeCalls(char ref, HashMap contexts, GenomeLoc loc) { + protected List makeGenotypeCalls(char ref, char alt, HashMap contexts, GenomeLoc loc) { // by default, we return no genotypes return new ArrayList(); } @@ -246,7 +245,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc return new Pair, GenotypeLocusData>(null, null); // populate the sample-specific data - List calls = makeGenotypeCalls(ref, contexts, loc); + List calls = makeGenotypeCalls(ref, baseOfMax, contexts, loc); // next, the general locus data // note that calculating strand bias involves overwriting data structures, so we do that last @@ -257,8 +256,11 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc } if ( locusdata instanceof AlleleFrequencyBacked ) { ((AlleleFrequencyBacked)locusdata).setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1)); - AlleleFrequencyBacked.AlleleFrequencyRange range = computeAFrange(alleleFrequencyPosteriors[indexOfMax], frequencyEstimationPoints-1, bestAFguess, ALLELE_FREQUENCY_RANGE); - ((AlleleFrequencyBacked)locusdata).setAlleleFrequencyRange(range); + // frequenc range doesn't make sense for single samples + if ( getNSamples(contexts) > 1 ) { + AlleleFrequencyBacked.AlleleFrequencyRange range = computeAFrange(alleleFrequencyPosteriors[indexOfMax], frequencyEstimationPoints-1, bestAFguess, ALLELE_FREQUENCY_RANGE); + ((AlleleFrequencyBacked)locusdata).setAlleleFrequencyRange(range); + } } if ( locusdata instanceof IDBacked ) { rodDbSNP dbsnp = getDbSNP(tracker); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index f8ba7d20a..b428591b3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection; import org.broadinstitute.sting.utils.genotype.*; @@ -95,14 +94,17 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeL // deal with input errors if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) { - throw new StingException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED."); + throw new IllegalArgumentException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED."); + } + if ( UAC.POOLSIZE < 1 && UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ) { + throw new IllegalArgumentException("Attempting to use the POOLED model with a pool size less than 1. Please set the pool size to an appropriate value."); } if ( UAC.LOD_THRESHOLD > Double.MIN_VALUE ) { StringBuilder sb = new StringBuilder(); sb.append("\n***\tThe --lod_threshold argument is no longer supported; instead, please use --min_confidence_threshold."); sb.append("\n***\tThere is approximately a 10-to-1 mapping from confidence to LOD."); sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff"); - throw new StingException(sb.toString()); + throw new IllegalArgumentException(sb.toString()); } // get all of the unique sample names diff --git a/java/src/org/broadinstitute/sting/utils/genotype/AlleleConstrainedGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/AlleleConstrainedGenotype.java new file mode 100755 index 000000000..72adbbcbb --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/AlleleConstrainedGenotype.java @@ -0,0 +1,54 @@ +package org.broadinstitute.sting.utils.genotype; + + +/** + * @author ebanks + *

+ * Class AlleleConstrainedGenotype + *

+ * A genotype that can have only one of 3 genotypes AA,AB,BB + */ +public abstract class AlleleConstrainedGenotype implements Genotype, PosteriorsBacked { + + protected final static char NO_CONSTRAINT = 'N'; + + private char ref = NO_CONSTRAINT; + private char alt = NO_CONSTRAINT; + + public AlleleConstrainedGenotype(char ref) { + this.ref = ref; + } + + /** + * set the allowed alternate allele + * + * @param alt the alternate allele + */ + public void setAlternateAllele(char alt) { + this.alt = alt; + } + + /** + * @return returns the allowed alternate allele + */ + public char getAlternateAllele() { + return alt; + } + + /** + * @return returns the best genotype + */ + protected abstract DiploidGenotype getBestGenotype(); + + /** + * get the bases that represent this + * + * @return the bases, as a string + */ + public String getBases() { + DiploidGenotype g = getBestGenotype(); + if ( alt != NO_CONSTRAINT && ((g.base1 != ref && g.base1 != alt) || (g.base2 != ref && g.base2 != alt)) ) + throw new IllegalStateException("The best genotype " + g + " is composed of an allele that is not " + ref + " or " + alt); + return g.toString(); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java index e7966d258..af883c47f 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java @@ -13,7 +13,7 @@ import java.util.Arrays; *

* The implementation of the genotype interface, specific to Geli */ -public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked { +public class GeliGenotypeCall extends AlleleConstrainedGenotype implements ReadBacked, PosteriorsBacked { private final char mRefBase; private final GenomeLoc mLocation; @@ -33,6 +33,7 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked * @param loc the genome loc */ public GeliGenotypeCall(char ref, GenomeLoc loc) { + super(ref); mRefBase = ref; mLocation = loc; @@ -42,6 +43,7 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked } public GeliGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError) { + super(ref); mRefBase = ref; mLocation = loc; mBestGenotype = DiploidGenotype.valueOf(genotype); @@ -99,10 +101,30 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked private void lazyEval() { if (mBestGenotype == null) { - Integer sorted[] = Utils.SortPermutation(mPosteriors); - mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; - mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]]; - mRefGenotype = DiploidGenotype.createHomGenotype(this.getReference()); + char ref = this.getReference(); + char alt = this.getAlternateAllele(); + + mRefGenotype = DiploidGenotype.createHomGenotype(ref); + + // if we are constrained to a single alternate allele, use only that one + if ( alt != AlleleConstrainedGenotype.NO_CONSTRAINT ) { + DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref)); + DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt); + boolean hetOverHom = mPosteriors[hetGenotype.ordinal()] > mPosteriors[homGenotype.ordinal()]; + boolean hetOverRef = mPosteriors[hetGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()]; + boolean homOverRef = mPosteriors[homGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()]; + if ( hetOverHom ) { + mBestGenotype = (hetOverRef ? hetGenotype : mRefGenotype); + mNextGenotype = (!hetOverRef ? hetGenotype : (homOverRef ? homGenotype : mRefGenotype)); + } else { + mBestGenotype = (homOverRef ? homGenotype : mRefGenotype); + mNextGenotype = (!homOverRef ? homGenotype : (hetOverRef ? hetGenotype : mRefGenotype)); + } + } else { + Integer sorted[] = Utils.SortPermutation(mPosteriors); + mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; + mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]]; + } } } @@ -117,7 +139,7 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked } // get the best genotype - private DiploidGenotype getBestGenotype() { + protected DiploidGenotype getBestGenotype() { lazyEval(); return mBestGenotype; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java index beafa999b..0ee440021 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java @@ -13,7 +13,7 @@ import java.util.Arrays; *

* The implementation of the genotype interface, specific to VCF */ -public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, SampleBacked { +public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBacked, SampleBacked { private final char mRefBase; private final GenomeLoc mLocation; @@ -31,6 +31,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, private String mSampleName; public VCFGenotypeCall(char ref, GenomeLoc loc) { + super(ref); mRefBase = ref; mLocation = loc; @@ -41,6 +42,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, } public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, int coverage, String sample) { + super(ref); mRefBase = ref; mLocation = loc; mBestGenotype = DiploidGenotype.unorderedValueOf(genotype); @@ -103,14 +105,33 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, private void lazyEval() { if (mBestGenotype == null) { - Integer sorted[] = Utils.SortPermutation(mPosteriors); - mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; - mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]]; - mRefGenotype = DiploidGenotype.createHomGenotype(this.getReference()); + char ref = this.getReference(); + char alt = this.getAlternateAllele(); + + mRefGenotype = DiploidGenotype.createHomGenotype(ref); + + // if we are constrained to a single alternate allele, use only that one + if ( alt != AlleleConstrainedGenotype.NO_CONSTRAINT ) { + DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref)); + DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt); + boolean hetOverHom = mPosteriors[hetGenotype.ordinal()] > mPosteriors[homGenotype.ordinal()]; + boolean hetOverRef = mPosteriors[hetGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()]; + boolean homOverRef = mPosteriors[homGenotype.ordinal()] > mPosteriors[mRefGenotype.ordinal()]; + if ( hetOverHom ) { + mBestGenotype = (hetOverRef ? hetGenotype : mRefGenotype); + mNextGenotype = (!hetOverRef ? hetGenotype : (homOverRef ? homGenotype : mRefGenotype)); + } else { + mBestGenotype = (homOverRef ? homGenotype : mRefGenotype); + mNextGenotype = (!homOverRef ? homGenotype : (hetOverRef ? hetGenotype : mRefGenotype)); + } + } else { + Integer sorted[] = Utils.SortPermutation(mPosteriors); + mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; + mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]]; + } } } - /** * get the confidence we have * @@ -121,7 +142,7 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, } // get the best genotype - private DiploidGenotype getBestGenotype() { + protected DiploidGenotype getBestGenotype() { lazyEval(); return mBestGenotype; } @@ -138,15 +159,6 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, return mRefGenotype; } - /** - * get the bases that represent this - * - * @return the bases, as a string - */ - public String getBases() { - return getBestGenotype().toString(); - } - /** * get the ploidy * diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 9c27c46af..f30790853 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("e4d32d41544fc12c1553f780be3fd272")); + Arrays.asList("3a33873d7284cc113a639ee12c925ac0")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("d0bfdedca0944a0b05d53f1a75408a57")); + Arrays.asList("bcdb54d8867543c5c316b65b27b0d3d6")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -84,7 +84,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,067,000-10,083,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("e2e4c147075186245d8eb1f8d3bb8705")); + Arrays.asList("300efc0306ae89e38b67c77175fe828d")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); }