diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 28634d9bb..2031a8b85 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -31,7 +31,7 @@ public class FisherStrand implements VariantAnnotation { if ( pvalue == null ) return null; - return new Pair("FisherStrand", String.format("%.4f", pvalue)); + return new Pair("FisherStrand", String.format("%.1f", -10.0 * Math.log10(pvalue))); } public boolean useZeroQualityReads() { return false; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java new file mode 100755 index 000000000..179dcfb75 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -0,0 +1,88 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; + +import java.util.*; + +public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalculationModel { + + protected DiploidGenotypeCalculationModel() {} + + // the GenotypeLikelihoods map + private HashMap GLs = new HashMap(); + + + protected HashMap createContexts(AlignmentContext context) { + return splitContextBySample(context); + } + + protected void initializeLikelihoods(char ref, HashMap contexts, StratifiedContext contextType) { + GLs.clear(); + + // use flat priors for GLs + DiploidGenotypePriors priors = new DiploidGenotypePriors(); + + for ( String sample : contexts.keySet() ) { + AlignmentContextBySample context = contexts.get(sample); + ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType)); + + // create the GenotypeLikelihoods object + GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform); + GL.add(pileup, true); + GLs.put(sample, GL); + } + } + + protected double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f) { + double PofDgivenAFi = 0.0; + + // for each sample + for ( GenotypeLikelihoods GL : GLs.values() ) { + + double[] posteriors = GL.getPosteriors(); + + double[] allelePosteriors = new double[] { posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()] }; + allelePosteriors = MathUtils.normalizeFromLog10(allelePosteriors); + + // calculate the posterior weighted frequencies + double[] HWvalues = getHardyWeinbergValues(f); + double samplePofDgivenAFi = 0.0; + samplePofDgivenAFi += HWvalues[GenotypeType.REF.ordinal()] * allelePosteriors[GenotypeType.REF.ordinal()]; + samplePofDgivenAFi += HWvalues[GenotypeType.HET.ordinal()] * allelePosteriors[GenotypeType.HET.ordinal()]; + samplePofDgivenAFi += HWvalues[GenotypeType.HOM.ordinal()] * allelePosteriors[GenotypeType.HOM.ordinal()]; + PofDgivenAFi += Math.log10(samplePofDgivenAFi); + } + + return PofDgivenAFi; + } + + protected List makeGenotypeCalls(char ref, HashMap contexts, GenomeLoc loc) { + ArrayList calls = new ArrayList(); + + for ( String sample : GLs.keySet() ) { + + // create the call + Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc); + + if ( call instanceof ReadBacked ) { + ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL)); + ((ReadBacked)call).setPileup(pileup); + } + if ( call instanceof SampleBacked ) { + ((SampleBacked)call).setSampleName(sample); + } + if ( call instanceof LikelihoodsBacked ) { + ((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods()); + } + if ( call instanceof PosteriorsBacked ) { + ((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors()); + } + + calls.add(call); + } + + return calls; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index ab32d7a2f..27c9d938a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -23,7 +23,8 @@ public abstract class GenotypeCalculationModel implements Cloneable { public enum Model { EM_POINT_ESTIMATE, - JOINT_ESTIMATE + JOINT_ESTIMATE, + POOLED } protected BaseMismatchModel baseModel; @@ -34,7 +35,6 @@ public abstract class GenotypeCalculationModel implements Cloneable { protected GenotypeWriterFactory.GENOTYPE_FORMAT OUTPUT_FORMAT; protected boolean ALL_BASE_MODE; protected boolean GENOTYPE_MODE; - protected boolean POOLED_INPUT; protected int POOL_SIZE; protected double CONFIDENCE_THRESHOLD; protected double MINIMUM_ALLELE_FREQUENCY; @@ -67,7 +67,6 @@ public abstract class GenotypeCalculationModel implements Cloneable { OUTPUT_FORMAT = UAC.VAR_FORMAT; ALL_BASE_MODE = UAC.ALL_BASES; GENOTYPE_MODE = UAC.GENOTYPE; - POOLED_INPUT = UAC.POOLED; POOL_SIZE = UAC.POOLSIZE; CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD; MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY; @@ -144,19 +143,15 @@ public abstract class GenotypeCalculationModel implements Cloneable { SAMRecord read = reads.get(i); int offset = offsets.get(i); - // find the sample; special case for pools + // find the sample String sample; - if ( POOLED_INPUT ) { - sample = "POOL"; + SAMReadGroupRecord readGroup = read.getReadGroup(); + if ( readGroup == null ) { + if ( assumedSingleSample == null ) + throw new StingException("Missing read group for read " + read.getReadName()); + sample = assumedSingleSample; } else { - SAMReadGroupRecord readGroup = read.getReadGroup(); - if ( readGroup == null ) { - if ( assumedSingleSample == null ) - throw new StingException("Missing read group for read " + read.getReadName()); - sample = assumedSingleSample; - } else { - sample = readGroup.getSample(); - } + sample = readGroup.getSample(); } // create a new context object if this is the first time we're seeing a read for this sample diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java index d89e3c77c..68ac7ddc3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java @@ -59,7 +59,10 @@ public class GenotypeCalculationModelFactory { gcm = new PointEstimateGenotypeCalculationModel(); break; case JOINT_ESTIMATE: - gcm = new JointEstimateGenotypeCalculationModel(); + gcm = new DiploidGenotypeCalculationModel(); + break; + case POOLED: + gcm = new PooledCalculationModel(); break; default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index 140e49b10..1139486e3 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.*; -public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel { +public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel { protected JointEstimateGenotypeCalculationModel() {} @@ -20,63 +20,53 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo // we cache the results to avoid having to recompute everything private HashMap hardyWeinbergValueCache = new HashMap(); + protected enum GenotypeType { REF, HET, HOM } + // the allele frequency priors - private double[] log10AlleleFrequencyPriors; + protected double[] log10AlleleFrequencyPriors; // the allele frequency posteriors and P(f>0) for each alternate allele - private double[][] alleleFrequencyPosteriors = new double[BaseUtils.BASES.length][]; - private double[][] log10PofDgivenAFi = new double[BaseUtils.BASES.length][]; - private double[] PofFs = new double[BaseUtils.BASES.length]; + protected double[][] alleleFrequencyPosteriors = new double[BaseUtils.BASES.length][]; + protected double[][] log10PofDgivenAFi = new double[BaseUtils.BASES.length][]; + protected double[] PofFs = new double[BaseUtils.BASES.length]; - // the minimum and actual number of points in our allele frequency estimation - private static final int MIN_ESTIMATION_POINTS = 100; - private int frequencyEstimationPoints; - - // the GenotypeLikelihoods map - private HashMap GLs = new HashMap(); - - private enum GenotypeType { REF, HET, HOM } + + // these need to be implemented + protected abstract HashMap createContexts(AlignmentContext context); + protected abstract void initializeLikelihoods(char ref, HashMap contexts, StratifiedContext contextType); + protected abstract double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f); + protected abstract List makeGenotypeCalls(char ref, HashMap contexts, GenomeLoc loc); public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // keep track of the context for each sample, overall and separated by strand - HashMap contexts = splitContextBySample(context); + HashMap contexts = createContexts(context); if ( contexts == null ) return null; - // todo -- eric, can you refactor this into a superclass the manages the i over 2n calculation? - // - // The only thing that's different from pools and msg should be the calls to - // initializeGenotypeLikelihoods and calculateAlleleFrequencyPosteriors - // There's a lot of functionality here that can be separated into a superclass that requires - // a few methods be overridden and it'll magically work for all data types, etc. - // - // Here are some examples: - // getNumberSamples() -> - // likelihoodsOfDGivenF() -> for MSG, pools, etc -- see below - // + int numSamples = contexts.size(); + int frequencyEstimationPoints = (2 * numSamples) + 1; // (add 1 for allele frequency of zero) - initializeAlleleFrequencies(contexts.size()); + initializeAlleleFrequencies(frequencyEstimationPoints); - // run joint estimation for the full GL contexts - initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.OVERALL); - calculateAlleleFrequencyPosteriors(ref, context.getLocation()); - return createCalls(tracker, ref, contexts, context.getLocation()); - } + initializeLikelihoods(ref, contexts, StratifiedContext.OVERALL); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints); + calculatePofFs(ref, frequencyEstimationPoints); - private void initializeAlleleFrequencies(int numSamples) { + // print out stats if we have a writer + if ( verboseWriter != null ) + printAlleleFrequencyData(ref, context.getLocation(), frequencyEstimationPoints); - // calculate the number of estimation points to use: - // it's either MIN_ESTIMATION_POINTS or 2N if that's larger - // (add 1 for allele frequency of zero) - frequencyEstimationPoints = Math.max(MIN_ESTIMATION_POINTS, 2 * numSamples) + 1; + return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints); + } + private void initializeAlleleFrequencies(int frequencyEstimationPoints) { // set up the allele frequency priors log10AlleleFrequencyPriors = getNullAlleleFrequencyPriors(frequencyEstimationPoints); } - private double[] getNullAlleleFrequencyPriors(int N) { + protected double[] getNullAlleleFrequencyPriors(int N) { double[] AFs = nullAlleleFrequencyCache.get(N); // if it hasn't been calculated yet, do so now @@ -108,24 +98,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo return AFs; } - private void initializeGenotypeLikelihoods(char ref, HashMap contexts, StratifiedContext contextType) { - GLs.clear(); - - // use flat priors for GLs - DiploidGenotypePriors priors = new DiploidGenotypePriors(); - - for ( String sample : contexts.keySet() ) { - AlignmentContextBySample context = contexts.get(sample); - ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType)); - - // create the GenotypeLikelihoods object - GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform); - GL.add(pileup, true); - GLs.put(sample, GL); - } - } - - private void calculateAlleleFrequencyPosteriors(char ref, GenomeLoc verboseLocation) { + protected void calculateAlleleFrequencyPosteriors(char ref, int frequencyEstimationPoints) { // initialization for ( char altAllele : BaseUtils.BASES ) { @@ -134,53 +107,27 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo log10PofDgivenAFi[baseIndex] = new double[frequencyEstimationPoints]; } DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref); + String refStr = String.valueOf(ref); - // - // todo -- if you invert this loop - // foreach frequnecy - // foreach altAllele - // - // then the pooled vs. msg calculation is just a function call difference + // for each alternate allele + for ( char altAllele : BaseUtils.BASES ) { + if ( altAllele == ref ) + continue; - // for each minor allele frequency - for (int i = 0; i < frequencyEstimationPoints; i++) { - double f = (double)i / (double)(frequencyEstimationPoints-1); + int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele); - // for each sample - for ( GenotypeLikelihoods GL : GLs.values() ) { + DiploidGenotype hetGenotype = ref < altAllele ? DiploidGenotype.valueOf(refStr + String.valueOf(altAllele)) : DiploidGenotype.valueOf(String.valueOf(altAllele) + refStr); + DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(altAllele); - double[] posteriors = GL.getPosteriors(); - - // get the ref data - double refPosterior = posteriors[refGenotype.ordinal()]; - String refStr = String.valueOf(ref); - - // for each alternate allele - for ( char altAllele : BaseUtils.BASES ) { - if ( altAllele == ref ) - continue; - - int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele); - - DiploidGenotype hetGenotype = ref < altAllele ? DiploidGenotype.valueOf(refStr + String.valueOf(altAllele)) : DiploidGenotype.valueOf(String.valueOf(altAllele) + refStr); - DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(altAllele); - - double[] allelePosteriors = new double[] { refPosterior, posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()] }; - allelePosteriors = MathUtils.normalizeFromLog10(allelePosteriors); - //logger.debug("Normalized posteriors for " + altAllele + ": " + allelePosteriors[0] + " " + allelePosteriors[1] + " " + allelePosteriors[2]); - - // calculate the posterior weighted frequencies - double[] HWvalues = getHardyWeinbergValues(f); - double PofDgivenAFi = 0.0; - PofDgivenAFi += HWvalues[GenotypeType.REF.ordinal()] * allelePosteriors[GenotypeType.REF.ordinal()]; - PofDgivenAFi += HWvalues[GenotypeType.HET.ordinal()] * allelePosteriors[GenotypeType.HET.ordinal()]; - PofDgivenAFi += HWvalues[GenotypeType.HOM.ordinal()] * allelePosteriors[GenotypeType.HOM.ordinal()]; - log10PofDgivenAFi[baseIndex][i] += Math.log10(PofDgivenAFi); - } + // for each minor allele frequency + for (int i = 0; i < frequencyEstimationPoints; i++) { + double f = (double)i / (double)(frequencyEstimationPoints-1); + log10PofDgivenAFi[baseIndex][i] += computeLog10PofDgivenAFi(refGenotype, hetGenotype, homGenotype, f); } } + } - // todo -- shouldn't this be in a function for clarity? + protected void calculatePofFs(char ref, int frequencyEstimationPoints) { // for each alternate allele for ( char altAllele : BaseUtils.BASES ) { if ( altAllele == ref ) @@ -199,13 +146,9 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo sum += alleleFrequencyPosteriors[baseIndex][i]; PofFs[baseIndex] = Math.min(sum, 1.0); // deal with precision errors } - - // print out stats if we have a position and a writer - if ( verboseLocation != null && verboseWriter != null ) - printAlleleFrequencyData(ref, verboseLocation); } - private double[] getHardyWeinbergValues(double f) { + protected double[] getHardyWeinbergValues(double f) { double[] HWvalues = hardyWeinbergValueCache.get(f); // if it hasn't been calculated yet, do so now @@ -231,7 +174,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo return HWvalues; } - private void printAlleleFrequencyData(char ref, GenomeLoc loc) { + protected void printAlleleFrequencyData(char ref, GenomeLoc loc, int frequencyEstimationPoints) { verboseWriter.println("Location=" + loc + ", ref=" + ref); StringBuilder header = new StringBuilder("MAF\tNullAFpriors\t"); @@ -267,7 +210,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo verboseWriter.println(); } - private Pair, GenotypeLocusData> createCalls(RefMetaDataTracker tracker, char ref, HashMap contexts, GenomeLoc loc) { + protected Pair, GenotypeLocusData> createCalls(RefMetaDataTracker tracker, char ref, HashMap contexts, GenomeLoc loc, int frequencyEstimationPoints) { // first, find the alt allele with maximum confidence int indexOfMax = 0; char baseOfMax = ref; @@ -290,31 +233,8 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo if ( !ALL_BASE_MODE && (bestAFguess == 0 || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) return new Pair, GenotypeLocusData>(null, null); - ArrayList calls = new ArrayList(); - - // first, populate the sample-specific data - for ( String sample : GLs.keySet() ) { - - // create the call - AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL); - Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation()); - - if ( call instanceof ReadBacked ) { - ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL)); - ((ReadBacked)call).setPileup(pileup); - } - if ( call instanceof SampleBacked ) { - ((SampleBacked)call).setSampleName(sample); - } - if ( call instanceof LikelihoodsBacked ) { - ((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods()); - } - if ( call instanceof PosteriorsBacked ) { - ((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors()); - } - - calls.add(call); - } + // populate the sample-specific data + List calls = makeGenotypeCalls(ref, contexts, loc); // next, the general locus data // note that calculating strand bias involves overwriting data structures, so we do that last @@ -340,14 +260,16 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo double lod = overallLog10PofF - overallLog10PofNull; // the forward lod - initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.FORWARD); - calculateAlleleFrequencyPosteriors(ref, null); + initializeLikelihoods(ref, contexts, StratifiedContext.FORWARD); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints); + calculatePofFs(ref, frequencyEstimationPoints); double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); double forwardLog10PofF = Math.log10(PofFs[indexOfMax]); // the reverse lod - initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.REVERSE); - calculateAlleleFrequencyPosteriors(ref, null); + initializeLikelihoods(ref, contexts, StratifiedContext.REVERSE); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints); + calculatePofFs(ref, frequencyEstimationPoints); double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); double reverseLog10PofF = Math.log10(PofFs[indexOfMax]); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java new file mode 100755 index 000000000..16ad12a24 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java @@ -0,0 +1,61 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; + +import java.util.*; + +import net.sf.samtools.SAMRecord; + +public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel { + + protected PooledCalculationModel() {} + + + protected HashMap createContexts(AlignmentContext context) { + // for testing purposes, we may want to throw multi-samples at pooled mode, + // so we can't use the standard splitContextBySample() method here + + AlignmentContextBySample pooledContext = new AlignmentContextBySample(context.getLocation()); + + int deletionsInPileup = 0; + List reads = context.getReads(); + List offsets = context.getOffsets(); + + for (int i = 0; i < reads.size(); i++) { + + // check for deletions + int offset = offsets.get(i); + if ( offset == -1 ) { + // are there too many deletions in the pileup? + if ( ++deletionsInPileup > maxDeletionsInPileup && maxDeletionsInPileup >= 0 ) + return null; + } + + // add the read to this sample's context + // note that bad bases are added to the context (for DoC calculations later) + pooledContext.add(reads.get(i), offset); + } + + HashMap contexts = new HashMap(); + contexts.put("POOL", pooledContext); + return contexts; + } + + protected void initializeLikelihoods(char ref, HashMap contexts, StratifiedContext contextType) { + return; + } + + protected double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f) { + + // TODO -- IMPLEMENT ME + + return 0.0; + } + + protected List makeGenotypeCalls(char ref, HashMap contexts, GenomeLoc loc) { + // no genotypes in pooled mode + return new ArrayList(); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index bc1258e6d..6c9a9c2b7 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; public class UnifiedArgumentCollection { // control the various models to be used - @Argument(fullName = "genotype_model", shortName = "gm", doc = "Genotype calculation model to employ -- EM_POINT_ESTIMATE is currently the default, while JOINT_ESTIMATE is under development. An exception will be thrown if an attempt is made to use EM_POINT_ESTIMATE with pooled data.", required = false) + @Argument(fullName = "genotype_model", shortName = "gm", doc = "Genotype calculation model to employ -- EM_POINT_ESTIMATE is currently the default, while JOINT_ESTIMATE and POOLED are under development.", required = false) public GenotypeCalculationModel.Model genotypeModel = GenotypeCalculationModel.Model.EM_POINT_ESTIMATE; @Argument(fullName = "base_model", shortName = "bm", doc = "Base substitution model to employ -- EMPIRICAL is the recommended default, but it's possible to select the ONE_STATE and THREE_STATE models for comparison purposes", required = false) @@ -41,10 +41,7 @@ public class UnifiedArgumentCollection { @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) public Double heterozygosity = DiploidGenotypePriors.HUMAN_HETEROZYGOSITY; - @Argument(fullName = "pooled", shortName = "pooled", doc = "Does the input bam represent pooled data (so that genotypes can't be called)?", required = false) - public boolean POOLED = false; - - @Argument(fullName = "poolSize", shortName = "ps", doc = "Number of individuals in the pool", required = false) + @Argument(fullName = "poolSize", shortName = "ps", doc = "Number of individuals in the pool (for POOLED model only)", required = false) public int POOLSIZE = 0; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index f79a14df2..0beae9c8d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -94,8 +94,8 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeL public void initialize() { // deal with input errors - if ( UAC.POOLED && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) { - throw new StingException("This was an attempt to use an EM Point Estimate model with pooled genotype calculations. This model does not work with pooled data."); + if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) { + throw new StingException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED."); } if ( UAC.LOD_THRESHOLD > Double.MIN_VALUE ) { StringBuilder sb = new StringBuilder(); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 94bb7f7b2..5d808d4d6 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -7,7 +7,7 @@ import java.util.Arrays; public class VariantAnnotatorIntegrationTest extends WalkerTest { public static String baseTestString() { - return "-T VariantAnnotator -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 20:10,000,000-10,010,000 -vcf %s"; + return "-T VariantAnnotator -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,010,000 -vcf %s"; } @Test diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 09548fa07..9c27c46af 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("b992e55996942c893948ea85660478ba")); + Arrays.asList("37e112d619c2b6f213554edd10262523")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("90c5129f298075ee0e18233b3763f25d")); + Arrays.asList("e4d32d41544fc12c1553f780be3fd272")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("033390940ecc0e2dcda5559d6a1802fa")); + Arrays.asList("d0bfdedca0944a0b05d53f1a75408a57")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -84,7 +84,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,067,000-10,083,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("8deb8b1132e7ddf28c7a0d919ce22985")); + Arrays.asList("e2e4c147075186245d8eb1f8d3bb8705")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); }