-Refactored JointEstimation code so that pooled calling will work

-Use phred-scale for fisher strand test
-Use only 2N allele frequency estimation points



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2059 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-17 20:21:15 +00:00
parent 43bd4c8e8f
commit a048f5cdf1
10 changed files with 225 additions and 159 deletions

View File

@ -31,7 +31,7 @@ public class FisherStrand implements VariantAnnotation {
if ( pvalue == null )
return null;
return new Pair<String, String>("FisherStrand", String.format("%.4f", pvalue));
return new Pair<String, String>("FisherStrand", String.format("%.1f", -10.0 * Math.log10(pvalue)));
}
public boolean useZeroQualityReads() { return false; }

View File

@ -0,0 +1,88 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*;
public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalculationModel {
protected DiploidGenotypeCalculationModel() {}
// the GenotypeLikelihoods map
private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
protected HashMap<String, AlignmentContextBySample> createContexts(AlignmentContext context) {
return splitContextBySample(context);
}
protected void initializeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
GLs.clear();
// use flat priors for GLs
DiploidGenotypePriors priors = new DiploidGenotypePriors();
for ( String sample : contexts.keySet() ) {
AlignmentContextBySample context = contexts.get(sample);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
// create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
GL.add(pileup, true);
GLs.put(sample, GL);
}
}
protected double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f) {
double PofDgivenAFi = 0.0;
// for each sample
for ( GenotypeLikelihoods GL : GLs.values() ) {
double[] posteriors = GL.getPosteriors();
double[] allelePosteriors = new double[] { posteriors[refGenotype.ordinal()], posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()] };
allelePosteriors = MathUtils.normalizeFromLog10(allelePosteriors);
// calculate the posterior weighted frequencies
double[] HWvalues = getHardyWeinbergValues(f);
double samplePofDgivenAFi = 0.0;
samplePofDgivenAFi += HWvalues[GenotypeType.REF.ordinal()] * allelePosteriors[GenotypeType.REF.ordinal()];
samplePofDgivenAFi += HWvalues[GenotypeType.HET.ordinal()] * allelePosteriors[GenotypeType.HET.ordinal()];
samplePofDgivenAFi += HWvalues[GenotypeType.HOM.ordinal()] * allelePosteriors[GenotypeType.HOM.ordinal()];
PofDgivenAFi += Math.log10(samplePofDgivenAFi);
}
return PofDgivenAFi;
}
protected List<Genotype> makeGenotypeCalls(char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
ArrayList<Genotype> calls = new ArrayList<Genotype>();
for ( String sample : GLs.keySet() ) {
// create the call
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc);
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));
((ReadBacked)call).setPileup(pileup);
}
if ( call instanceof SampleBacked ) {
((SampleBacked)call).setSampleName(sample);
}
if ( call instanceof LikelihoodsBacked ) {
((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
}
if ( call instanceof PosteriorsBacked ) {
((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
}
calls.add(call);
}
return calls;
}
}

View File

@ -23,7 +23,8 @@ public abstract class GenotypeCalculationModel implements Cloneable {
public enum Model {
EM_POINT_ESTIMATE,
JOINT_ESTIMATE
JOINT_ESTIMATE,
POOLED
}
protected BaseMismatchModel baseModel;
@ -34,7 +35,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected GenotypeWriterFactory.GENOTYPE_FORMAT OUTPUT_FORMAT;
protected boolean ALL_BASE_MODE;
protected boolean GENOTYPE_MODE;
protected boolean POOLED_INPUT;
protected int POOL_SIZE;
protected double CONFIDENCE_THRESHOLD;
protected double MINIMUM_ALLELE_FREQUENCY;
@ -67,7 +67,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
OUTPUT_FORMAT = UAC.VAR_FORMAT;
ALL_BASE_MODE = UAC.ALL_BASES;
GENOTYPE_MODE = UAC.GENOTYPE;
POOLED_INPUT = UAC.POOLED;
POOL_SIZE = UAC.POOLSIZE;
CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD;
MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY;
@ -144,19 +143,15 @@ public abstract class GenotypeCalculationModel implements Cloneable {
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
// find the sample; special case for pools
// find the sample
String sample;
if ( POOLED_INPUT ) {
sample = "POOL";
SAMReadGroupRecord readGroup = read.getReadGroup();
if ( readGroup == null ) {
if ( assumedSingleSample == null )
throw new StingException("Missing read group for read " + read.getReadName());
sample = assumedSingleSample;
} else {
SAMReadGroupRecord readGroup = read.getReadGroup();
if ( readGroup == null ) {
if ( assumedSingleSample == null )
throw new StingException("Missing read group for read " + read.getReadName());
sample = assumedSingleSample;
} else {
sample = readGroup.getSample();
}
sample = readGroup.getSample();
}
// create a new context object if this is the first time we're seeing a read for this sample

View File

@ -59,7 +59,10 @@ public class GenotypeCalculationModelFactory {
gcm = new PointEstimateGenotypeCalculationModel();
break;
case JOINT_ESTIMATE:
gcm = new JointEstimateGenotypeCalculationModel();
gcm = new DiploidGenotypeCalculationModel();
break;
case POOLED:
gcm = new PooledCalculationModel();
break;
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
}

View File

@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*;
public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {
public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {
protected JointEstimateGenotypeCalculationModel() {}
@ -20,63 +20,53 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
// we cache the results to avoid having to recompute everything
private HashMap<Double, double[]> hardyWeinbergValueCache = new HashMap<Double, double[]>();
protected enum GenotypeType { REF, HET, HOM }
// the allele frequency priors
private double[] log10AlleleFrequencyPriors;
protected double[] log10AlleleFrequencyPriors;
// the allele frequency posteriors and P(f>0) for each alternate allele
private double[][] alleleFrequencyPosteriors = new double[BaseUtils.BASES.length][];
private double[][] log10PofDgivenAFi = new double[BaseUtils.BASES.length][];
private double[] PofFs = new double[BaseUtils.BASES.length];
protected double[][] alleleFrequencyPosteriors = new double[BaseUtils.BASES.length][];
protected double[][] log10PofDgivenAFi = new double[BaseUtils.BASES.length][];
protected double[] PofFs = new double[BaseUtils.BASES.length];
// the minimum and actual number of points in our allele frequency estimation
private static final int MIN_ESTIMATION_POINTS = 100;
private int frequencyEstimationPoints;
// the GenotypeLikelihoods map
private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
private enum GenotypeType { REF, HET, HOM }
// these need to be implemented
protected abstract HashMap<String, AlignmentContextBySample> createContexts(AlignmentContext context);
protected abstract void initializeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType);
protected abstract double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f);
protected abstract List<Genotype> makeGenotypeCalls(char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc);
public Pair<List<Genotype>, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
// keep track of the context for each sample, overall and separated by strand
HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
HashMap<String, AlignmentContextBySample> contexts = createContexts(context);
if ( contexts == null )
return null;
// todo -- eric, can you refactor this into a superclass the manages the i over 2n calculation?
//
// The only thing that's different from pools and msg should be the calls to
// initializeGenotypeLikelihoods and calculateAlleleFrequencyPosteriors
// There's a lot of functionality here that can be separated into a superclass that requires
// a few methods be overridden and it'll magically work for all data types, etc.
//
// Here are some examples:
// getNumberSamples() ->
// likelihoodsOfDGivenF() -> for MSG, pools, etc -- see below
//
int numSamples = contexts.size();
int frequencyEstimationPoints = (2 * numSamples) + 1; // (add 1 for allele frequency of zero)
initializeAlleleFrequencies(contexts.size());
initializeAlleleFrequencies(frequencyEstimationPoints);
// run joint estimation for the full GL contexts
initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.OVERALL);
calculateAlleleFrequencyPosteriors(ref, context.getLocation());
return createCalls(tracker, ref, contexts, context.getLocation());
}
initializeLikelihoods(ref, contexts, StratifiedContext.OVERALL);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints);
calculatePofFs(ref, frequencyEstimationPoints);
private void initializeAlleleFrequencies(int numSamples) {
// print out stats if we have a writer
if ( verboseWriter != null )
printAlleleFrequencyData(ref, context.getLocation(), frequencyEstimationPoints);
// calculate the number of estimation points to use:
// it's either MIN_ESTIMATION_POINTS or 2N if that's larger
// (add 1 for allele frequency of zero)
frequencyEstimationPoints = Math.max(MIN_ESTIMATION_POINTS, 2 * numSamples) + 1;
return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints);
}
private void initializeAlleleFrequencies(int frequencyEstimationPoints) {
// set up the allele frequency priors
log10AlleleFrequencyPriors = getNullAlleleFrequencyPriors(frequencyEstimationPoints);
}
private double[] getNullAlleleFrequencyPriors(int N) {
protected double[] getNullAlleleFrequencyPriors(int N) {
double[] AFs = nullAlleleFrequencyCache.get(N);
// if it hasn't been calculated yet, do so now
@ -108,24 +98,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
return AFs;
}
private void initializeGenotypeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
GLs.clear();
// use flat priors for GLs
DiploidGenotypePriors priors = new DiploidGenotypePriors();
for ( String sample : contexts.keySet() ) {
AlignmentContextBySample context = contexts.get(sample);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
// create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
GL.add(pileup, true);
GLs.put(sample, GL);
}
}
private void calculateAlleleFrequencyPosteriors(char ref, GenomeLoc verboseLocation) {
protected void calculateAlleleFrequencyPosteriors(char ref, int frequencyEstimationPoints) {
// initialization
for ( char altAllele : BaseUtils.BASES ) {
@ -134,53 +107,27 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
log10PofDgivenAFi[baseIndex] = new double[frequencyEstimationPoints];
}
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
String refStr = String.valueOf(ref);
//
// todo -- if you invert this loop
// foreach frequnecy
// foreach altAllele
//
// then the pooled vs. msg calculation is just a function call difference
// for each alternate allele
for ( char altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
continue;
// for each minor allele frequency
for (int i = 0; i < frequencyEstimationPoints; i++) {
double f = (double)i / (double)(frequencyEstimationPoints-1);
int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
// for each sample
for ( GenotypeLikelihoods GL : GLs.values() ) {
DiploidGenotype hetGenotype = ref < altAllele ? DiploidGenotype.valueOf(refStr + String.valueOf(altAllele)) : DiploidGenotype.valueOf(String.valueOf(altAllele) + refStr);
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(altAllele);
double[] posteriors = GL.getPosteriors();
// get the ref data
double refPosterior = posteriors[refGenotype.ordinal()];
String refStr = String.valueOf(ref);
// for each alternate allele
for ( char altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
continue;
int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
DiploidGenotype hetGenotype = ref < altAllele ? DiploidGenotype.valueOf(refStr + String.valueOf(altAllele)) : DiploidGenotype.valueOf(String.valueOf(altAllele) + refStr);
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(altAllele);
double[] allelePosteriors = new double[] { refPosterior, posteriors[hetGenotype.ordinal()], posteriors[homGenotype.ordinal()] };
allelePosteriors = MathUtils.normalizeFromLog10(allelePosteriors);
//logger.debug("Normalized posteriors for " + altAllele + ": " + allelePosteriors[0] + " " + allelePosteriors[1] + " " + allelePosteriors[2]);
// calculate the posterior weighted frequencies
double[] HWvalues = getHardyWeinbergValues(f);
double PofDgivenAFi = 0.0;
PofDgivenAFi += HWvalues[GenotypeType.REF.ordinal()] * allelePosteriors[GenotypeType.REF.ordinal()];
PofDgivenAFi += HWvalues[GenotypeType.HET.ordinal()] * allelePosteriors[GenotypeType.HET.ordinal()];
PofDgivenAFi += HWvalues[GenotypeType.HOM.ordinal()] * allelePosteriors[GenotypeType.HOM.ordinal()];
log10PofDgivenAFi[baseIndex][i] += Math.log10(PofDgivenAFi);
}
// for each minor allele frequency
for (int i = 0; i < frequencyEstimationPoints; i++) {
double f = (double)i / (double)(frequencyEstimationPoints-1);
log10PofDgivenAFi[baseIndex][i] += computeLog10PofDgivenAFi(refGenotype, hetGenotype, homGenotype, f);
}
}
}
// todo -- shouldn't this be in a function for clarity?
protected void calculatePofFs(char ref, int frequencyEstimationPoints) {
// for each alternate allele
for ( char altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
@ -199,13 +146,9 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
sum += alleleFrequencyPosteriors[baseIndex][i];
PofFs[baseIndex] = Math.min(sum, 1.0); // deal with precision errors
}
// print out stats if we have a position and a writer
if ( verboseLocation != null && verboseWriter != null )
printAlleleFrequencyData(ref, verboseLocation);
}
private double[] getHardyWeinbergValues(double f) {
protected double[] getHardyWeinbergValues(double f) {
double[] HWvalues = hardyWeinbergValueCache.get(f);
// if it hasn't been calculated yet, do so now
@ -231,7 +174,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
return HWvalues;
}
private void printAlleleFrequencyData(char ref, GenomeLoc loc) {
protected void printAlleleFrequencyData(char ref, GenomeLoc loc, int frequencyEstimationPoints) {
verboseWriter.println("Location=" + loc + ", ref=" + ref);
StringBuilder header = new StringBuilder("MAF\tNullAFpriors\t");
@ -267,7 +210,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
verboseWriter.println();
}
private Pair<List<Genotype>, GenotypeLocusData> createCalls(RefMetaDataTracker tracker, char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
protected Pair<List<Genotype>, GenotypeLocusData> createCalls(RefMetaDataTracker tracker, char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc, int frequencyEstimationPoints) {
// first, find the alt allele with maximum confidence
int indexOfMax = 0;
char baseOfMax = ref;
@ -290,31 +233,8 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
if ( !ALL_BASE_MODE && (bestAFguess == 0 || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
ArrayList<Genotype> calls = new ArrayList<Genotype>();
// first, populate the sample-specific data
for ( String sample : GLs.keySet() ) {
// create the call
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));
((ReadBacked)call).setPileup(pileup);
}
if ( call instanceof SampleBacked ) {
((SampleBacked)call).setSampleName(sample);
}
if ( call instanceof LikelihoodsBacked ) {
((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
}
if ( call instanceof PosteriorsBacked ) {
((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
}
calls.add(call);
}
// populate the sample-specific data
List<Genotype> calls = makeGenotypeCalls(ref, contexts, loc);
// next, the general locus data
// note that calculating strand bias involves overwriting data structures, so we do that last
@ -340,14 +260,16 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
double lod = overallLog10PofF - overallLog10PofNull;
// the forward lod
initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.FORWARD);
calculateAlleleFrequencyPosteriors(ref, null);
initializeLikelihoods(ref, contexts, StratifiedContext.FORWARD);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints);
calculatePofFs(ref, frequencyEstimationPoints);
double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
double forwardLog10PofF = Math.log10(PofFs[indexOfMax]);
// the reverse lod
initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.REVERSE);
calculateAlleleFrequencyPosteriors(ref, null);
initializeLikelihoods(ref, contexts, StratifiedContext.REVERSE);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints);
calculatePofFs(ref, frequencyEstimationPoints);
double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
double reverseLog10PofF = Math.log10(PofFs[indexOfMax]);

View File

@ -0,0 +1,61 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*;
import net.sf.samtools.SAMRecord;
public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel {
protected PooledCalculationModel() {}
protected HashMap<String, AlignmentContextBySample> createContexts(AlignmentContext context) {
// for testing purposes, we may want to throw multi-samples at pooled mode,
// so we can't use the standard splitContextBySample() method here
AlignmentContextBySample pooledContext = new AlignmentContextBySample(context.getLocation());
int deletionsInPileup = 0;
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
for (int i = 0; i < reads.size(); i++) {
// check for deletions
int offset = offsets.get(i);
if ( offset == -1 ) {
// are there too many deletions in the pileup?
if ( ++deletionsInPileup > maxDeletionsInPileup && maxDeletionsInPileup >= 0 )
return null;
}
// add the read to this sample's context
// note that bad bases are added to the context (for DoC calculations later)
pooledContext.add(reads.get(i), offset);
}
HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
contexts.put("POOL", pooledContext);
return contexts;
}
protected void initializeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
return;
}
protected double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f) {
// TODO -- IMPLEMENT ME
return 0.0;
}
protected List<Genotype> makeGenotypeCalls(char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
// no genotypes in pooled mode
return new ArrayList<Genotype>();
}
}

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
public class UnifiedArgumentCollection {
// control the various models to be used
@Argument(fullName = "genotype_model", shortName = "gm", doc = "Genotype calculation model to employ -- EM_POINT_ESTIMATE is currently the default, while JOINT_ESTIMATE is under development. An exception will be thrown if an attempt is made to use EM_POINT_ESTIMATE with pooled data.", required = false)
@Argument(fullName = "genotype_model", shortName = "gm", doc = "Genotype calculation model to employ -- EM_POINT_ESTIMATE is currently the default, while JOINT_ESTIMATE and POOLED are under development.", required = false)
public GenotypeCalculationModel.Model genotypeModel = GenotypeCalculationModel.Model.EM_POINT_ESTIMATE;
@Argument(fullName = "base_model", shortName = "bm", doc = "Base substitution model to employ -- EMPIRICAL is the recommended default, but it's possible to select the ONE_STATE and THREE_STATE models for comparison purposes", required = false)
@ -41,10 +41,7 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
public Double heterozygosity = DiploidGenotypePriors.HUMAN_HETEROZYGOSITY;
@Argument(fullName = "pooled", shortName = "pooled", doc = "Does the input bam represent pooled data (so that genotypes can't be called)?", required = false)
public boolean POOLED = false;
@Argument(fullName = "poolSize", shortName = "ps", doc = "Number of individuals in the pool", required = false)
@Argument(fullName = "poolSize", shortName = "ps", doc = "Number of individuals in the pool (for POOLED model only)", required = false)
public int POOLSIZE = 0;

View File

@ -94,8 +94,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
public void initialize() {
// deal with input errors
if ( UAC.POOLED && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
throw new StingException("This was an attempt to use an EM Point Estimate model with pooled genotype calculations. This model does not work with pooled data.");
if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
throw new StingException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
}
if ( UAC.LOD_THRESHOLD > Double.MIN_VALUE ) {
StringBuilder sb = new StringBuilder();

View File

@ -7,7 +7,7 @@ import java.util.Arrays;
public class VariantAnnotatorIntegrationTest extends WalkerTest {
public static String baseTestString() {
return "-T VariantAnnotator -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 20:10,000,000-10,010,000 -vcf %s";
return "-T VariantAnnotator -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,010,000 -vcf %s";
}
@Test

View File

@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("b992e55996942c893948ea85660478ba"));
Arrays.asList("37e112d619c2b6f213554edd10262523"));
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
}
@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("90c5129f298075ee0e18233b3763f25d"));
Arrays.asList("e4d32d41544fc12c1553f780be3fd272"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("033390940ecc0e2dcda5559d6a1802fa"));
Arrays.asList("d0bfdedca0944a0b05d53f1a75408a57"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -84,7 +84,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,067,000-10,083,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("8deb8b1132e7ddf28c7a0d919ce22985"));
Arrays.asList("e2e4c147075186245d8eb1f8d3bb8705"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}