A new power walker for the user to feed in a number of alleles. Call that number k. Output is:

Locus Power_for_k_alleles Power_for_k-2_alleles Power_for_k-2_alleles ... Power_for_1_allele This was a request from Jason Flannick & the T2DB group. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1786 348d0f76-0448-11de-a6fe-93d51630548a
2009-10-08 15:35:35 +00:00 · 2009-10-08 15:35:35 +00:00 · 11ff1e09b8
parent 04fe50cadd
commit 11ff1e09b8
1 changed files with 148 additions and 0 deletions
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java
@ -0,0 +1,148 @@
+package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
+
+import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.utils.cmdLine.Argument;
+import org.broadinstitute.sting.utils.Pair;
+import org.broadinstitute.sting.utils.QualityUtils;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.ListUtils;
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.playground.utils.PoolUtils;
+import net.sf.samtools.SAMRecord;
+
+import java.util.List;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: Ghost
+ * Date: Oct 8, 2009
+ * Time: 9:44:35 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
+    @Argument(fullName="lodThreshold", shortName="lod", doc="Threshold for log likelihood ratio to be called a SNP. Defaults to 3.0", required = false)
+    public double lodThresh = 3.0;
+
+    @Argument(fullName="minimumQScore", shortName="qm", doc="Use bases whose phred (Q) score meets or exceeds this number. Defaults to 0", required = false)
+    public byte minQ = 0;
+
+    @Argument(fullName="poolSize", shortName="ps", doc="Number of individuals in the pool", required = true)
+    public int numIndividuals = 0;
+
+    @Argument(fullName="alleleFrequency", shortName="af", doc="Calculate power for all allele frequencies below this. Defaults to 4", required=false)
+    public int alleleFreq = 4;
+
+    @Argument(fullName="useMeanProb", doc="Use the mean probability as the \"average quality\" rather than median Q-score")
+    boolean useMean = false;
+
+    public void initialize() {
+        if ( alleleFreq < 1 ) {
+            String err = "Allele frequency (-af) must be greater than or equal to one.";
+            throw new StingException(err);
+        }
+    }
+
+    public Integer reduceInit() {
+        out.print(makeHeader());
+        return 0;
+    }
+
+    public Integer reduce(Integer mapint, Integer prevint) {
+        return 0;
+    }
+
+    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        String output = String.format("%s", context.getLocation().toString());
+
+        // threshold reads if necessary
+
+        if ( minQ > 0 ) {
+            Pair<List<SAMRecord>, List<Integer>> thresh = PoolUtils.thresholdReadsByQuality(context.getReads(),context.getOffsets(),minQ);
+            context = new AlignmentContext(context.getLocation(), thresh.getFirst(), thresh.getSecond());
+        }
+
+        // calculate powers and put into output string
+
+        for ( int i = 1; i <= alleleFreq; i ++ ) {
+            output = String.format("%s\t%f",output,calculatePowerAtFrequency(context,i));
+        }
+
+        // print the output string
+
+        out.printf("%s%n",output);
+
+        return 0;
+    }
+
+    public double calculatePowerAtFrequency( AlignmentContext context, int alleles ) {
+        return theoreticalPower( context.numReads(), getMeanQ(context), alleles );
+    }
+
+    public byte getMeanQ( AlignmentContext context ) {
+        byte meanQ;
+        if ( useMean ) {
+            meanQ = QualityUtils.probToQual(expectedMatchRate(context));
+        } else {
+            meanQ = ListUtils.getQScoreMedian(context.getReads(),context.getOffsets());
+        }
+
+        return meanQ;
+    }
+
+    public double expectedMatchRate(AlignmentContext context) {
+        int nReads = context.numReads();
+        double matches = 0.0;
+        for ( int r = 0; r < nReads; r ++ ) {
+            matches += QualityUtils.qualToProb(context.getReads().get(r).getBaseQualities()[context.getOffsets().get(r)]);
+        }
+
+        return matches/nReads;
+    }
+
+    public String makeHeader() {
+        // create the header
+        String header = "Power_at_1";
+        for ( int i = 1; i < alleleFreq; i ++ ) {
+            header = header + "\tPower_at_"+Integer.toString(i);
+        }
+
+        return String.format("%s%n", header);
+    }
+
+    public double theoreticalPower( int depth, byte q, int alleles ) {
+        double power;
+        if( depth <= 0 ) {
+            power = 0.0;
+        } else {
+            double p_error = QualityUtils.qualToErrorProb(q);
+            double snpProp = getSNPProportion(alleles);
+            double kterm_num = Math.log10( snpProp * (1 - p_error) + (1 - snpProp) * (p_error/3) );
+            double kterm_denom = Math.log10( p_error/3 );
+            double dkterm_num = Math.log10( snpProp * (p_error/3) + (1 - snpProp) * (1 - p_error) );
+            double dkterm_denom = Math.log10( 1 - p_error);
+
+            int kaccept = (int) Math.ceil( ( lodThresh - ( (double) depth ) * ( dkterm_num - dkterm_denom ) ) /
+                    ( kterm_num - kterm_denom- dkterm_num + dkterm_denom ) );
+
+            // we will reject the null hypothesis if we see kaccept or more SNPs, the power is the probability that this occurs
+            // we can optimize this by checking to see which sum is smaller
+
+            if ( depth - kaccept < kaccept ) {// kaccept > depth/2 - calculate power as P[hits between kaccept and depth]
+                power = MathUtils.cumBinomialProbLog(kaccept, depth, depth, snpProp);
+            } else { // kaccept < depth/2 - calculate power as 1-P[hits between 0 and kaccept]
+                power = 1-MathUtils.cumBinomialProbLog(0,kaccept,depth,snpProp);
+            }
+        }
+
+        return power;
+    }
+
+    private double getSNPProportion(int alleles) {
+        return ((double)alleles)/numIndividuals;
+    }
+
+
+}