Merge pull request #93 from broadinstitute/gda_ancient_dna
Two features useful for ancient DNA processing. Ancient DNA sequencing d...
This commit is contained in:
commit
508b58376c
|
|
@ -112,6 +112,15 @@ public class StandardCallerArgumentCollection {
|
|||
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||
public int MAX_ALTERNATE_ALLELES = 6;
|
||||
|
||||
/**
|
||||
* By default, the prior specified with the argument --heterozygosity/-hets is used for variant discovery at a particular locus.
|
||||
* If This argument is true, the heterozygosity prior will not be used - main application is for population studies where prior might not be appropriate,
|
||||
* as for example when the ancestral status of the reference allele is not known.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "dont_use_site_prior", shortName = "noPrior", doc = "If true, skip prior for variant discovery", required = false)
|
||||
public boolean ignoreHeterozygosityPrior = false;
|
||||
|
||||
/**
|
||||
* If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads.
|
||||
* Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we
|
||||
|
|
@ -180,5 +189,6 @@ public class StandardCallerArgumentCollection {
|
|||
this.exactCallsLog = SCAC.exactCallsLog;
|
||||
this.sampleContamination=SCAC.sampleContamination;
|
||||
this.AFmodel = SCAC.AFmodel;
|
||||
this.ignoreHeterozygosityPrior = SCAC.ignoreHeterozygosityPrior;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -159,8 +159,8 @@ public class UnifiedGenotyperEngine {
|
|||
this.N = samples.size() * ploidy;
|
||||
log10AlleleFrequencyPriorsSNPs = new double[N+1];
|
||||
log10AlleleFrequencyPriorsIndels = new double[N+1];
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY);
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity, UAC.ignoreHeterozygosityPrior);
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY, UAC.ignoreHeterozygosityPrior);
|
||||
|
||||
filter.add(LOW_QUAL_FILTER_NAME);
|
||||
|
||||
|
|
@ -722,8 +722,20 @@ public class UnifiedGenotyperEngine {
|
|||
return GGAmodel;
|
||||
}
|
||||
|
||||
public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {
|
||||
/**
|
||||
* Function that fills vector with allele frequency priors. By default, infinite-sites, neutral variation prior is used,
|
||||
* where Pr(AC=i) = theta/i where theta is heterozygosity
|
||||
* @param N Number of chromosomes
|
||||
* @param priors (output) array to be filled with priors
|
||||
* @param theta Heterozygosity
|
||||
* @param ignorePriors If true, priors are ignored and zeros returned
|
||||
*/
|
||||
public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta, final boolean ignorePriors) {
|
||||
|
||||
if (ignorePriors) {
|
||||
Arrays.fill(priors, 0,N,0.0);
|
||||
return;
|
||||
}
|
||||
double sum = 0.0;
|
||||
|
||||
// for each i
|
||||
|
|
@ -733,6 +745,10 @@ public class UnifiedGenotyperEngine {
|
|||
sum += value;
|
||||
}
|
||||
|
||||
// protection against the case of heterozygosity too high or an excessive number of samples (which break population genetics assumptions)
|
||||
if (sum > 1.0) {
|
||||
throw new UserException.BadArgumentValue("heterozygosity","The heterozygosity value is set too high relative to the number of samples to be processed - try reducing heterozygosity value or using the -noPrior argument");
|
||||
}
|
||||
// null frequency for AF=0 is (1 - sum(all other frequencies))
|
||||
priors[0] = Math.log10(1.0 - sum);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ public class AFCalcTestBuilder {
|
|||
return MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
||||
case human:
|
||||
final double[] humanPriors = new double[nPriorValues];
|
||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001);
|
||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001, false);
|
||||
return humanPriors;
|
||||
default:
|
||||
throw new RuntimeException("Unexpected type " + priorType);
|
||||
|
|
|
|||
|
|
@ -139,6 +139,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoPrior() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -noPrior", 1,
|
||||
Arrays.asList("422656266117f8d01e17e5c491c49a24"));
|
||||
executeTest("test no prior 1", spec1);
|
||||
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing heterozygosity
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ public class AFCalcUnitTest extends BaseTest {
|
|||
final int nPriorValues = 2*nSamples+1;
|
||||
final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
||||
final double[] humanPriors = new double[nPriorValues];
|
||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001);
|
||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001, false);
|
||||
|
||||
for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) {
|
||||
for ( AFCalc model : calcs ) {
|
||||
|
|
@ -575,6 +575,35 @@ public class AFCalcUnitTest extends BaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
|
||||
@Test(enabled = true, dataProvider = "Models")
|
||||
public void testNoPrior(final AFCalc model) {
|
||||
for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) {
|
||||
final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000);
|
||||
|
||||
final double[] flatPriors = new double[]{0.0,0.0,0.0};
|
||||
final double[] noPriors = new double[3];
|
||||
// test that function computeAlleleFrequency correctly operates when the -noPrior option is set
|
||||
UnifiedGenotyperEngine.computeAlleleFrequencyPriors(2, noPriors, 0.001, true);
|
||||
|
||||
GetGLsTest cfgFlatPrior = new GetGLsTest(model, 1, Arrays.asList(AB), flatPriors, "flatPrior");
|
||||
GetGLsTest cfgNoPrior = new GetGLsTest(model, 1, Arrays.asList(AB), flatPriors, "noPrior");
|
||||
final AFCalcResult resultTrackerFlat = cfgFlatPrior.execute();
|
||||
final AFCalcResult resultTrackerNoPrior = cfgNoPrior.execute();
|
||||
|
||||
final double pRefWithNoPrior = AB.getLikelihoods().getAsVector()[0];
|
||||
final double pHetWithNoPrior = AB.getLikelihoods().getAsVector()[1] - Math.log10(0.5);
|
||||
final double nonRefPost = Math.pow(10, pHetWithNoPrior) / (Math.pow(10, pRefWithNoPrior) + Math.pow(10, pHetWithNoPrior));
|
||||
final double log10NonRefPost = Math.log10(nonRefPost);
|
||||
|
||||
if ( ! Double.isInfinite(log10NonRefPost) ) {
|
||||
// check that the no-prior and flat-prior constructions yield same result
|
||||
Assert.assertEquals(resultTrackerFlat.getLog10PosteriorOfAFGT0(), resultTrackerNoPrior.getLog10PosteriorOfAFGT0());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models")
|
||||
public void testBiallelicPriors(final AFCalc model) {
|
||||
|
||||
|
|
|
|||
|
|
@ -29,18 +29,20 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
/**
|
||||
* Filters out reads whose length is >= some value.
|
||||
* Filters out reads whose length is >= some value or < some value.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class MaxReadLengthFilter extends ReadFilter {
|
||||
public class ReadLengthFilter extends ReadFilter {
|
||||
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required=true)
|
||||
private int maxReadLength;
|
||||
|
||||
|
||||
@Argument(fullName = "minReadLength", shortName = "minRead", doc="Discard reads with length shorter than the specified value", required=true)
|
||||
private int minReadLength = 1;
|
||||
public boolean filterOut(SAMRecord read) {
|
||||
// check the length
|
||||
return read.getReadLength() > maxReadLength;
|
||||
return read.getReadLength() > maxReadLength || read.getReadLength() < minReadLength;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue