diff --git a/playground/java/src/org/broadinstitute/sting/atk/GenotypeEvidence.java b/playground/java/src/org/broadinstitute/sting/atk/GenotypeEvidence.java new file mode 100755 index 000000000..918123acf --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/atk/GenotypeEvidence.java @@ -0,0 +1,78 @@ +package org.broadinstitute.sting.atk; + +/** + * Created by IntelliJ IDEA. + * User: andrewk + * Date: Mar 9, 2009 + * Time: 3:34:08 PM + * To change this template use File | Settings | File Templates. + */ +public class GenotypeEvidence { + + int[] nuc2num = new int[128]; + int[] nucs = new int[4]; + int a = nucs[0]; + int c = nucs[1]; + int t = nucs[2]; + int g = nucs[3]; + float[] nuc_pcnt = new float[4]; + char ref; + public float q; // % non-reference alleles + public int refbases; + public int allbases; + + public GenotypeEvidence(String bases, char ref){ + this.ref = ref; + nuc2num['A'] = 0; + nuc2num['C'] = 1; + nuc2num['T'] = 2; + nuc2num['G'] = 3; + nuc2num['a'] = 0; + nuc2num['c'] = 1; + nuc2num['t'] = 2; + nuc2num['g'] = 3; + + for (char b : bases.toCharArray()) { + nucs[nuc2num[b]] += 1; + /*switch (b) { + case 'A': nucs[0] += 1; break; + case 'C': nucs[1] += 1; break; + case 'T': nucs[2] += 1; break; + case 'G': nucs[3] += 1; break; + } */ + } + + // Calculate q = ref. bases / nonref. bases + refbases = nucs[nuc2num[ref]]; + allbases = bases.length(); + q = 1 - ((float)refbases / allbases); + + /*for (int i=0; i<4; i++) { + nuc_pcnt[i] = (float)nucs[i] / len; + //if + }*/ + } + + + public boolean SigNonref(float cutoff_fraction) { + /* for (char nuc : nucs) { + + }*/ + + + return true; + } + + public void print() { + + System.out.format("A %2d | ", nucs[0]); + System.out.format("C %2d | ", nucs[1]); + System.out.format("T %2d | ", nucs[2]); + System.out.format("G %2d | ", nucs[3]); + System.out.format("Ref %s | ", ref); + + } + + + +} diff --git a/playground/java/src/org/broadinstitute/sting/atk/LocusIterator.java b/playground/java/src/org/broadinstitute/sting/atk/LocusIterator.java index 84bb9bb08..433b7c1c4 100755 --- a/playground/java/src/org/broadinstitute/sting/atk/LocusIterator.java +++ b/playground/java/src/org/broadinstitute/sting/atk/LocusIterator.java @@ -33,6 +33,7 @@ public class LocusIterator implements Iterable, CloseableIterator public List getReads() { return reads; } public List getOffsets() { return offsets; } + public int numReads() { return reads.size(); } // ----------------------------------------------------------------------------------------------------------------- // diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/GenotypeWalker.java b/playground/java/src/org/broadinstitute/sting/atk/modules/GenotypeWalker.java new file mode 100755 index 000000000..5bff04810 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/atk/modules/GenotypeWalker.java @@ -0,0 +1,88 @@ +package org.broadinstitute.sting.atk.modules; + +import org.broadinstitute.sting.atk.LocusIterator; +import org.broadinstitute.sting.atk.GenotypeEvidence; +import org.broadinstitute.sting.utils.ReferenceOrderedDatum; +import net.sf.samtools.SAMRecord; + + +import java.util.List; +import static java.lang.System.currentTimeMillis; + +public class GenotypeWalker extends BasicLociWalker { + public Integer map(List rodData, char ref, LocusIterator context) { + //char[] = new char(26); + long start_tm = currentTimeMillis(); + List reads = context.getReads(); + List offsets = context.getOffsets(); + String bases = ""; + String quals = ""; + //String offsetString = ""; + for ( int i = 0; i < reads.size(); i++ ) { + SAMRecord read = reads.get(i); + int offset = offsets.get(i); + + //if ( offset >= read.getReadString().length() ) + // System.out.printf(" [%2d] [%s] %s%n", offset, read.format(), read.getReadString()); + + bases += read.getReadString().charAt(offset); + //quals += read.getBaseQualityString().charAt(offset); + //offsetString += i; + //System.out.printf(" [%2d] [%s] %s%n", offset, read.getReadString().charAt(offset), read.getReadString()); + } + + GenotypeEvidence all = new GenotypeEvidence(bases, ref); + + // P(q|G) - prob of nonref mixture given the genotype + float qobs = all.q; // observed percent of non-ref bases + double G; // % non-ref bases in observed + if (qobs >= 0.1) { + all.print(); + System.out.format("q %.2f | ", all.q); + System.out.format("%s | ", context.getLocation()); + System.out.format("Total %4d | ", context.numReads()); + System.out.println(); + for (int q = 0; q < all.allbases; q ++) { + for (G = 0.01; G <= 1.0; G += 0.49) { // iterate over: ref (0%), het (50%) and hom (100%) nonref bases observed + //double pqG = binomialProb(all.allbases - all.refbases, all.allbases, G); + double pqG = binomialProb(q, all.allbases, G); + //all.print(); + System.out.format("P(q|G) %.3f | ", pqG); + } + System.out.println(); + } + long stop_tm = currentTimeMillis(); + System.out.format("%.3fs\n", (float)(stop_tm - start_tm) / 1000); + } + return 1; + } + + static double binomialProb(int k, int n, double p) { + // k - numebr of successes + // n - number of Bernoulli trials + // p - probability of success + + return (double)nchoosek(n, k) * Math.pow(p, k) * Math.pow(1-p, n-k); + } + + static int nchoosek(int n, int k) { + int t = 1; + + int m = n - k; + if (k < m) { + k = m; + } + + for (int i = n, j = 1; i > k; i--, j++) { + t = t * i / j; + } + + return t; + } + + public Integer reduceInit() { return 0; } + + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } +} diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/NullWalker.java b/playground/java/src/org/broadinstitute/sting/atk/modules/NullWalker.java new file mode 100644 index 000000000..d6d3f91c8 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/atk/modules/NullWalker.java @@ -0,0 +1,44 @@ +package org.broadinstitute.sting.atk.modules; + +import org.broadinstitute.sting.atk.LocusWalker; +import org.broadinstitute.sting.atk.LocusIterator; +import org.broadinstitute.sting.utils.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.rodDbSNP; +import org.broadinstitute.sting.utils.Utils; +import net.sf.samtools.SAMRecord; + +import java.util.List; + +// Null traversal. For ATK performance measuring. +// j.maguire 3-7-2009 + +public class NullWalker implements LocusWalker { + public void initialize() { + } + + public String walkerType() { return "ByLocus"; } + + // Do we actually want to operate on the context? + public boolean filter(List rodData, char ref, LocusIterator context) { + return true; // We are keeping all the reads + } + + // Map over the org.broadinstitute.sting.atk.LocusContext + public Integer map(List rodData, char ref, LocusIterator context) + { + return 1; + } + + // Given result of map function + public Integer reduceInit() + { + return 0; + } + public Integer reduce(Integer value, Integer sum) + { + return 0; + } + + public void onTraveralDone() { + } +} diff --git a/playground/java/src/org/broadinstitute/sting/atk/modules/SingleSampleGenotyper.java b/playground/java/src/org/broadinstitute/sting/atk/modules/SingleSampleGenotyper.java new file mode 100644 index 000000000..5363f417c --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/atk/modules/SingleSampleGenotyper.java @@ -0,0 +1,149 @@ +package org.broadinstitute.sting.atk.modules; + +import org.broadinstitute.sting.atk.LocusWalker; +import org.broadinstitute.sting.atk.LocusIterator; +import org.broadinstitute.sting.utils.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.rodDbSNP; +import org.broadinstitute.sting.utils.Utils; +import net.sf.samtools.SAMRecord; + +import java.util.List; + +// Draft single sample genotyper +// j.maguire 3-7-2009 + +public class SingleSampleGenotyper implements LocusWalker { + public void initialize() { + } + + public String walkerType() { return "ByLocus"; } + + // Do we actually want to operate on the context? + public boolean filter(List rodData, char ref, LocusIterator context) { + return true; // We are keeping all the reads + } + + protected class GenotypeLikelihoods + { + public double[] likelihoods; + public String[] genotypes; + + GenotypeLikelihoods() + { + likelihoods = new double[10]; + genotypes = new String[10]; + + genotypes[0] = "AA"; + genotypes[1] = "AC"; + genotypes[2] = "AG"; + genotypes[3] = "AT"; + genotypes[4] = "CC"; + genotypes[5] = "CG"; + genotypes[6] = "CT"; + genotypes[7] = "GG"; + genotypes[8] = "GT"; + genotypes[9] = "TT"; + } + + void add(char ref, char read, byte qual) + { + double p_error = Math.pow(10.0, (double)qual / -10); + for (int i = 0; i < genotypes.length; i++) + { + likelihoods[i] += AlleleLikelihood(ref, read, genotypes[i], p_error); + } + } + + double AlleleLikelihood(char ref, char read, String genotype, double p_error) + { + char h1 = genotype.charAt(0); + char h2 = genotype.charAt(1); + + double p_base; + + if ((h1 == h2) && (h1 == read)) { p_base = Math.log10(1-p_error); } + else if ((h1 != h2) && (h1 == read) || (h2 == read)) { p_base = Math.log10(0.5 - (p_error/2.0)); } + else { p_base = Math.log10(p_error); } + + return p_base; + } + + public String toString() + { + Integer[] permutation = Utils.SortPermutation(likelihoods); + String[] sorted_genotypes = Utils.PermuteArray(genotypes, permutation); + double[] sorted_likelihoods = Utils.PermuteArray(likelihoods, permutation); + + String s = ""; + for (int i = sorted_genotypes.length-1; i >= 0; i--) + { + if (i != sorted_genotypes.length-1) { s = s + " "; } + s = s + sorted_genotypes[i] + ":" + sorted_likelihoods[i]; + } + return s; + } + + } + + // Map over the org.broadinstitute.sting.atk.LocusContext + public Integer map(List rodData, char ref, LocusIterator context) { + //System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size()); + //for ( SAMRecord read : context.getReads() ) { + // System.out.println(" -> " + read.getReadName()); + //} + + List reads = context.getReads(); + List offsets = context.getOffsets(); + String bases = ""; + String quals = ""; + //String offsetString = ""; + + // Look up hapmap and dbsnp priors + String rodString = ""; + for ( ReferenceOrderedDatum datum : rodData ) + { + if ( datum != null ) + { + if ( datum instanceof rodDbSNP) + { + rodDbSNP dbsnp = (rodDbSNP)datum; + rodString += dbsnp.toMediumString(); + } + else + { + rodString += datum.toSimpleString(); + } + } + } + if ( rodString != "" ) + rodString = "[ROD: " + rodString + "]"; + + // Accumulate genotype likelihoods + GenotypeLikelihoods G = new GenotypeLikelihoods(); + for ( int i = 0; i < reads.size(); i++ ) + { + SAMRecord read = reads.get(i); + int offset = offsets.get(i); + bases += read.getReadString().charAt(offset); + quals += read.getBaseQualityString().charAt(offset); + + G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]); + } + + if ( context.getLocation().getStart() % 1 == 0 ) { + //System.out.printf("%s: %s %s %s %s%n", context.getLocation(), ref, bases, quals, rodString); + System.out.printf("%s %s %s %s\n", ref, bases, G.toString(), rodString); + } + + return 1; + } + + // Given result of map function + public Integer reduceInit() { return 0; } + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + public void onTraveralDone() { + } +} diff --git a/playground/java/src/org/broadinstitute/sting/utils/Utils.java b/playground/java/src/org/broadinstitute/sting/utils/Utils.java index af59a0dcc..52ba6d81b 100755 --- a/playground/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/playground/java/src/org/broadinstitute/sting/utils/Utils.java @@ -108,4 +108,93 @@ public class Utils { GenomeLoc.setContigOrdering(refContigOrdering); } + + // Java Generics can't do primitive types, so I had to do this the simplistic way + + public static Integer[] SortPermutation(final int[] A) + { + class comparator implements Comparator + { + public int compare(Object a, Object b) + { + if (A[(Integer)a] < A[(Integer)b]) { return -1; } + if (A[(Integer)a] == A[(Integer)b]) { return 0; } + if (A[(Integer)a] > A[(Integer)b]) { return 1; } + return 0; + } + } + Integer[] permutation = new Integer[A.length]; + for (int i = 0; i < A.length; i++) + { + permutation[i] = i; + } + Arrays.sort(permutation, new comparator()); + return permutation; + } + + public static Integer[] SortPermutation(final double[] A) + { + class comparator implements Comparator + { + public int compare(Object a, Object b) + { + if (A[(Integer)a] < A[(Integer)b]) { return -1; } + if (A[(Integer)a] == A[(Integer)b]) { return 0; } + if (A[(Integer)a] > A[(Integer)b]) { return 1; } + return 0; + } + } + Integer[] permutation = new Integer[A.length]; + for (int i = 0; i < A.length; i++) + { + permutation[i] = i; + } + Arrays.sort(permutation, new comparator()); + return permutation; + } + + public static int[] PermuteArray(int[] array, Integer[] permutation) + { + int[] output = new int[array.length]; + for (int i = 0; i < output.length; i++) + { + output[i] = array[permutation[i]]; + } + return output; + } + + public static double[] PermuteArray(double[] array, Integer[] permutation) + { + double[] output = new double[array.length]; + for (int i = 0; i < output.length; i++) + { + output[i] = array[permutation[i]]; + } + return output; + } + + public static Object[] PermuteArray(Object[] array, Integer[] permutation) + { + Object[] output = new Object[array.length]; + for (int i = 0; i < output.length; i++) + { + output[i] = array[permutation[i]]; + } + return output; + } + + public static String[] PermuteArray(String[] array, Integer[] permutation) + { + String[] output = new String[array.length]; + for (int i = 0; i < output.length; i++) + { + output[i] = array[permutation[i]]; + } + return output; + } + } + + + +