diff --git a/java/src/edu/mit/broad/sting/atk/AnalysisTK.java b/java/src/edu/mit/broad/sting/atk/AnalysisTK.java index 7001e7f07..b0f71b91e 100644 --- a/java/src/edu/mit/broad/sting/atk/AnalysisTK.java +++ b/java/src/edu/mit/broad/sting/atk/AnalysisTK.java @@ -37,6 +37,7 @@ public class AnalysisTK extends CommandLineProgram { addModule("CountReads", new CountReadsWalker()); addModule("PrintReads", new PrintReadsWalker()); addModule("Base_Quality_Histogram", new BaseQualityHistoWalker()); + addModule("Genotype", new GenotypeWalker()); addModule("SingleSampleGenotyper", new SingleSampleGenotyper()); addModule("Null", new NullWalker()); } diff --git a/java/src/edu/mit/broad/sting/atk/GenotypeEvidence.java b/java/src/edu/mit/broad/sting/atk/GenotypeEvidence.java new file mode 100755 index 000000000..60790a008 --- /dev/null +++ b/java/src/edu/mit/broad/sting/atk/GenotypeEvidence.java @@ -0,0 +1,78 @@ +package edu.mit.broad.sting.atk; + +/** + * Created by IntelliJ IDEA. + * User: andrewk + * Date: Mar 9, 2009 + * Time: 3:34:08 PM + * To change this template use File | Settings | File Templates. + */ +public class GenotypeEvidence { + + int[] nuc2num = new int[128]; + int[] nucs = new int[4]; + int a = nucs[0]; + int c = nucs[1]; + int t = nucs[2]; + int g = nucs[3]; + float[] nuc_pcnt = new float[4]; + char ref; + public float q; // % non-reference alleles + public int refbases; + public int allbases; + + public GenotypeEvidence(String bases, char ref){ + this.ref = ref; + nuc2num['A'] = 0; + nuc2num['C'] = 1; + nuc2num['T'] = 2; + nuc2num['G'] = 3; + nuc2num['a'] = 0; + nuc2num['c'] = 1; + nuc2num['t'] = 2; + nuc2num['g'] = 3; + + for (char b : bases.toCharArray()) { + nucs[nuc2num[b]] += 1; + /*switch (b) { + case 'A': nucs[0] += 1; break; + case 'C': nucs[1] += 1; break; + case 'T': nucs[2] += 1; break; + case 'G': nucs[3] += 1; break; + } */ + } + + // Calculate q = ref. bases / nonref. bases + refbases = nucs[nuc2num[ref]]; + allbases = bases.length(); + q = 1 - ((float)refbases / allbases); + + /*for (int i=0; i<4; i++) { + nuc_pcnt[i] = (float)nucs[i] / len; + //if + }*/ + } + + + public boolean SigNonref(float cutoff_fraction) { + /* for (char nuc : nucs) { + + }*/ + + + return true; + } + + public void print() { + + System.out.format("A %2d | ", nucs[0]); + System.out.format("C %2d | ", nucs[1]); + System.out.format("T %2d | ", nucs[2]); + System.out.format("G %2d | ", nucs[3]); + System.out.format("Ref %s | ", ref); + + } + + + +} diff --git a/java/src/edu/mit/broad/sting/atk/LocusIterator.java b/java/src/edu/mit/broad/sting/atk/LocusIterator.java index ff306ab03..da7f73e07 100755 --- a/java/src/edu/mit/broad/sting/atk/LocusIterator.java +++ b/java/src/edu/mit/broad/sting/atk/LocusIterator.java @@ -33,6 +33,7 @@ public class LocusIterator implements Iterable, CloseableIterator public List getReads() { return reads; } public List getOffsets() { return offsets; } + public int numReads() { return reads.size(); } // ----------------------------------------------------------------------------------------------------------------- // diff --git a/java/src/edu/mit/broad/sting/atk/modules/GenotypeWalker.java b/java/src/edu/mit/broad/sting/atk/modules/GenotypeWalker.java new file mode 100755 index 000000000..b15960380 --- /dev/null +++ b/java/src/edu/mit/broad/sting/atk/modules/GenotypeWalker.java @@ -0,0 +1,88 @@ +package edu.mit.broad.sting.atk.modules; + +import edu.mit.broad.sting.atk.LocusIterator; +import edu.mit.broad.sting.atk.GenotypeEvidence; +import edu.mit.broad.sting.utils.ReferenceOrderedDatum; +import net.sf.samtools.SAMRecord; + + +import java.util.List; +import static java.lang.System.currentTimeMillis; + +public class GenotypeWalker extends BasicLociWalker { + public Integer map(List rodData, char ref, LocusIterator context) { + //char[] = new char(26); + long start_tm = currentTimeMillis(); + List reads = context.getReads(); + List offsets = context.getOffsets(); + String bases = ""; + String quals = ""; + //String offsetString = ""; + for ( int i = 0; i < reads.size(); i++ ) { + SAMRecord read = reads.get(i); + int offset = offsets.get(i); + + //if ( offset >= read.getReadString().length() ) + // System.out.printf(" [%2d] [%s] %s%n", offset, read.format(), read.getReadString()); + + bases += read.getReadString().charAt(offset); + //quals += read.getBaseQualityString().charAt(offset); + //offsetString += i; + //System.out.printf(" [%2d] [%s] %s%n", offset, read.getReadString().charAt(offset), read.getReadString()); + } + + GenotypeEvidence all = new GenotypeEvidence(bases, ref); + + // P(q|G) - prob of nonref mixture given the genotype + float qobs = all.q; // observed percent of non-ref bases + double G; // % non-ref bases in observed + if (qobs >= 0.1) { + all.print(); + System.out.format("q %.2f | ", all.q); + System.out.format("%s | ", context.getLocation()); + System.out.format("Total %4d | ", context.numReads()); + System.out.println(); + for (int q = 0; q < all.allbases; q ++) { + for (G = 0.01; G <= 1.0; G += 0.49) { // iterate over: ref (0%), het (50%) and hom (100%) nonref bases observed + //double pqG = binomialProb(all.allbases - all.refbases, all.allbases, G); + double pqG = binomialProb(q, all.allbases, G); + //all.print(); + System.out.format("P(q|G) %.3f | ", pqG); + } + System.out.println(); + } + long stop_tm = currentTimeMillis(); + System.out.format("%.3fs\n", (float)(stop_tm - start_tm) / 1000); + } + return 1; + } + + static double binomialProb(int k, int n, double p) { + // k - numebr of successes + // n - number of Bernoulli trials + // p - probability of success + + return (double)nchoosek(n, k) * Math.pow(p, k) * Math.pow(1-p, n-k); + } + + static int nchoosek(int n, int k) { + int t = 1; + + int m = n - k; + if (k < m) { + k = m; + } + + for (int i = n, j = 1; i > k; i--, j++) { + t = t * i / j; + } + + return t; + } + + public Integer reduceInit() { return 0; } + + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } +} \ No newline at end of file