diff --git a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index a47e417c4..0769c8749 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -112,6 +112,15 @@ public class StandardCallerArgumentCollection { @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) public int MAX_ALTERNATE_ALLELES = 6; + /** + * By default, the prior specified with the argument --heterozygosity/-hets is used for variant discovery at a particular locus. + * If This argument is true, the heterozygosity prior will not be used - main application is for population studies where prior might not be appropriate, + * as for example when the ancestral status of the reference allele is not known. + */ + @Advanced + @Argument(fullName = "dont_use_site_prior", shortName = "noPrior", doc = "If true, skip prior for variant discovery", required = false) + public boolean ignoreHeterozygosityPrior = false; + /** * If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads. * Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we @@ -180,5 +189,6 @@ public class StandardCallerArgumentCollection { this.exactCallsLog = SCAC.exactCallsLog; this.sampleContamination=SCAC.sampleContamination; this.AFmodel = SCAC.AFmodel; + this.ignoreHeterozygosityPrior = SCAC.ignoreHeterozygosityPrior; } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index ede0741ff..1d0c10795 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -159,8 +159,8 @@ public class UnifiedGenotyperEngine { this.N = samples.size() * ploidy; log10AlleleFrequencyPriorsSNPs = new double[N+1]; log10AlleleFrequencyPriorsIndels = new double[N+1]; - computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity); - computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY); + computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity, UAC.ignoreHeterozygosityPrior); + computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY, UAC.ignoreHeterozygosityPrior); filter.add(LOW_QUAL_FILTER_NAME); @@ -722,8 +722,20 @@ public class UnifiedGenotyperEngine { return GGAmodel; } - public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) { + /** + * Function that fills vector with allele frequency priors. By default, infinite-sites, neutral variation prior is used, + * where Pr(AC=i) = theta/i where theta is heterozygosity + * @param N Number of chromosomes + * @param priors (output) array to be filled with priors + * @param theta Heterozygosity + * @param ignorePriors If true, priors are ignored and zeros returned + */ + public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta, final boolean ignorePriors) { + if (ignorePriors) { + Arrays.fill(priors, 0,N,0.0); + return; + } double sum = 0.0; // for each i @@ -733,6 +745,10 @@ public class UnifiedGenotyperEngine { sum += value; } + // protection against the case of heterozygosity too high or an excessive number of samples (which break population genetics assumptions) + if (sum > 1.0) { + throw new UserException.BadArgumentValue("heterozygosity","The heterozygosity value is set too high relative to the number of samples to be processed - try reducing heterozygosity value or using the -noPrior argument"); + } // null frequency for AF=0 is (1 - sum(all other frequencies)) priors[0] = Math.log10(1.0 - sum); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java index a66a5580c..a4224bf6c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java @@ -111,7 +111,7 @@ public class AFCalcTestBuilder { return MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors case human: final double[] humanPriors = new double[nPriorValues]; - UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001); + UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001, false); return humanPriors; default: throw new RuntimeException("Unexpected type " + priorType); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index ca965a042..a0440aaed 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -139,6 +139,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("test confidence 1", spec1); } + @Test + public void testNoPrior() { + WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -noPrior", 1, + Arrays.asList("422656266117f8d01e17e5c491c49a24")); + executeTest("test no prior 1", spec1); + + } + // -------------------------------------------------------------------------------------------------------------- // // testing heterozygosity diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java index c4f5befcf..5eebe9670 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java @@ -176,7 +176,7 @@ public class AFCalcUnitTest extends BaseTest { final int nPriorValues = 2*nSamples+1; final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors final double[] humanPriors = new double[nPriorValues]; - UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001); + UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001, false); for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) { for ( AFCalc model : calcs ) { @@ -575,6 +575,35 @@ public class AFCalcUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } + + @Test(enabled = true, dataProvider = "Models") + public void testNoPrior(final AFCalc model) { + for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) { + final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000); + + final double[] flatPriors = new double[]{0.0,0.0,0.0}; + final double[] noPriors = new double[3]; + // test that function computeAlleleFrequency correctly operates when the -noPrior option is set + UnifiedGenotyperEngine.computeAlleleFrequencyPriors(2, noPriors, 0.001, true); + + GetGLsTest cfgFlatPrior = new GetGLsTest(model, 1, Arrays.asList(AB), flatPriors, "flatPrior"); + GetGLsTest cfgNoPrior = new GetGLsTest(model, 1, Arrays.asList(AB), flatPriors, "noPrior"); + final AFCalcResult resultTrackerFlat = cfgFlatPrior.execute(); + final AFCalcResult resultTrackerNoPrior = cfgNoPrior.execute(); + + final double pRefWithNoPrior = AB.getLikelihoods().getAsVector()[0]; + final double pHetWithNoPrior = AB.getLikelihoods().getAsVector()[1] - Math.log10(0.5); + final double nonRefPost = Math.pow(10, pHetWithNoPrior) / (Math.pow(10, pRefWithNoPrior) + Math.pow(10, pHetWithNoPrior)); + final double log10NonRefPost = Math.log10(nonRefPost); + + if ( ! Double.isInfinite(log10NonRefPost) ) { + // check that the no-prior and flat-prior constructions yield same result + Assert.assertEquals(resultTrackerFlat.getLog10PosteriorOfAFGT0(), resultTrackerNoPrior.getLog10PosteriorOfAFGT0()); + } + + } + } + @Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models") public void testBiallelicPriors(final AFCalc model) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadLengthFilter.java similarity index 79% rename from public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java rename to public/java/src/org/broadinstitute/sting/gatk/filters/ReadLengthFilter.java index df1c11a2b..80224b786 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadLengthFilter.java @@ -29,18 +29,20 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; /** - * Filters out reads whose length is >= some value. + * Filters out reads whose length is >= some value or < some value. * * @author mhanna * @version 0.1 */ -public class MaxReadLengthFilter extends ReadFilter { +public class ReadLengthFilter extends ReadFilter { @Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required=true) private int maxReadLength; - + + @Argument(fullName = "minReadLength", shortName = "minRead", doc="Discard reads with length shorter than the specified value", required=true) + private int minReadLength = 1; public boolean filterOut(SAMRecord read) { // check the length - return read.getReadLength() > maxReadLength; + return read.getReadLength() > maxReadLength || read.getReadLength() < minReadLength; } }