diff --git a/java/src/edu/mit/broad/sting/atk/AnalysisTK.java b/java/src/edu/mit/broad/sting/atk/AnalysisTK.java index 4751385bc..7001e7f07 100644 --- a/java/src/edu/mit/broad/sting/atk/AnalysisTK.java +++ b/java/src/edu/mit/broad/sting/atk/AnalysisTK.java @@ -37,6 +37,8 @@ public class AnalysisTK extends CommandLineProgram { addModule("CountReads", new CountReadsWalker()); addModule("PrintReads", new PrintReadsWalker()); addModule("Base_Quality_Histogram", new BaseQualityHistoWalker()); + addModule("SingleSampleGenotyper", new SingleSampleGenotyper()); + addModule("Null", new NullWalker()); } private TraversalEngine engine = null; diff --git a/java/src/edu/mit/broad/sting/atk/modules/NullWalker.java b/java/src/edu/mit/broad/sting/atk/modules/NullWalker.java new file mode 100644 index 000000000..a10e18136 --- /dev/null +++ b/java/src/edu/mit/broad/sting/atk/modules/NullWalker.java @@ -0,0 +1,44 @@ +package edu.mit.broad.sting.atk.modules; + +import edu.mit.broad.sting.atk.LocusWalker; +import edu.mit.broad.sting.atk.LocusIterator; +import edu.mit.broad.sting.utils.ReferenceOrderedDatum; +import edu.mit.broad.sting.utils.rodDbSNP; +import edu.mit.broad.sting.utils.Utils; +import net.sf.samtools.SAMRecord; + +import java.util.List; + +// Null traversal. For ATK performance measuring. +// j.maguire 3-7-2009 + +public class NullWalker implements LocusWalker { + public void initialize() { + } + + public String walkerType() { return "ByLocus"; } + + // Do we actually want to operate on the context? + public boolean filter(List rodData, char ref, LocusIterator context) { + return true; // We are keeping all the reads + } + + // Map over the edu.mit.broad.sting.atk.LocusContext + public Integer map(List rodData, char ref, LocusIterator context) + { + return 1; + } + + // Given result of map function + public Integer reduceInit() + { + return 0; + } + public Integer reduce(Integer value, Integer sum) + { + return 0; + } + + public void onTraveralDone() { + } +} diff --git a/java/src/edu/mit/broad/sting/atk/modules/SingleSampleGenotyper.java b/java/src/edu/mit/broad/sting/atk/modules/SingleSampleGenotyper.java new file mode 100644 index 000000000..9ca7e87a3 --- /dev/null +++ b/java/src/edu/mit/broad/sting/atk/modules/SingleSampleGenotyper.java @@ -0,0 +1,149 @@ +package edu.mit.broad.sting.atk.modules; + +import edu.mit.broad.sting.atk.LocusWalker; +import edu.mit.broad.sting.atk.LocusIterator; +import edu.mit.broad.sting.utils.ReferenceOrderedDatum; +import edu.mit.broad.sting.utils.rodDbSNP; +import edu.mit.broad.sting.utils.Utils; +import net.sf.samtools.SAMRecord; + +import java.util.List; + +// Draft single sample genotyper +// j.maguire 3-7-2009 + +public class SingleSampleGenotyper implements LocusWalker { + public void initialize() { + } + + public String walkerType() { return "ByLocus"; } + + // Do we actually want to operate on the context? + public boolean filter(List rodData, char ref, LocusIterator context) { + return true; // We are keeping all the reads + } + + protected class GenotypeLikelihoods + { + public double[] likelihoods; + public String[] genotypes; + + GenotypeLikelihoods() + { + likelihoods = new double[10]; + genotypes = new String[10]; + + genotypes[0] = "AA"; + genotypes[1] = "AC"; + genotypes[2] = "AG"; + genotypes[3] = "AT"; + genotypes[4] = "CC"; + genotypes[5] = "CG"; + genotypes[6] = "CT"; + genotypes[7] = "GG"; + genotypes[8] = "GT"; + genotypes[9] = "TT"; + } + + void add(char ref, char read, byte qual) + { + double p_error = Math.pow(10.0, (double)qual / -10); + for (int i = 0; i < genotypes.length; i++) + { + likelihoods[i] += AlleleLikelihood(ref, read, genotypes[i], p_error); + } + } + + double AlleleLikelihood(char ref, char read, String genotype, double p_error) + { + char h1 = genotype.charAt(0); + char h2 = genotype.charAt(1); + + double p_base; + + if ((h1 == h2) && (h1 == read)) { p_base = Math.log10(1-p_error); } + else if ((h1 != h2) && (h1 == read) || (h2 == read)) { p_base = Math.log10(0.5 - (p_error/2.0)); } + else { p_base = Math.log10(p_error); } + + return p_base; + } + + public String toString() + { + Integer[] permutation = Utils.SortPermutation(likelihoods); + String[] sorted_genotypes = Utils.PermuteArray(genotypes, permutation); + double[] sorted_likelihoods = Utils.PermuteArray(likelihoods, permutation); + + String s = ""; + for (int i = sorted_genotypes.length-1; i >= 0; i--) + { + if (i != sorted_genotypes.length-1) { s = s + " "; } + s = s + sorted_genotypes[i] + ":" + sorted_likelihoods[i]; + } + return s; + } + + } + + // Map over the edu.mit.broad.sting.atk.LocusContext + public Integer map(List rodData, char ref, LocusIterator context) { + //System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size()); + //for ( SAMRecord read : context.getReads() ) { + // System.out.println(" -> " + read.getReadName()); + //} + + List reads = context.getReads(); + List offsets = context.getOffsets(); + String bases = ""; + String quals = ""; + //String offsetString = ""; + + // Look up hapmap and dbsnp priors + String rodString = ""; + for ( ReferenceOrderedDatum datum : rodData ) + { + if ( datum != null ) + { + if ( datum instanceof rodDbSNP) + { + rodDbSNP dbsnp = (rodDbSNP)datum; + rodString += dbsnp.toMediumString(); + } + else + { + rodString += datum.toSimpleString(); + } + } + } + if ( rodString != "" ) + rodString = "[ROD: " + rodString + "]"; + + // Accumulate genotype likelihoods + GenotypeLikelihoods G = new GenotypeLikelihoods(); + for ( int i = 0; i < reads.size(); i++ ) + { + SAMRecord read = reads.get(i); + int offset = offsets.get(i); + bases += read.getReadString().charAt(offset); + quals += read.getBaseQualityString().charAt(offset); + + G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]); + } + + if ( context.getLocation().getStart() % 1 == 0 ) { + //System.out.printf("%s: %s %s %s %s%n", context.getLocation(), ref, bases, quals, rodString); + System.out.printf("%s %s %s %s\n", ref, bases, G.toString(), rodString); + } + + return 1; + } + + // Given result of map function + public Integer reduceInit() { return 0; } + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + public void onTraveralDone() { + } +} diff --git a/java/src/edu/mit/broad/sting/utils/Utils.java b/java/src/edu/mit/broad/sting/utils/Utils.java index da2637f59..d02d47843 100755 --- a/java/src/edu/mit/broad/sting/utils/Utils.java +++ b/java/src/edu/mit/broad/sting/utils/Utils.java @@ -108,4 +108,93 @@ public class Utils { GenomeLoc.setContigOrdering(refContigOrdering); } + + // Java Generics can't do primitive types, so I had to do this the simplistic way + + public static Integer[] SortPermutation(final int[] A) + { + class comparator implements Comparator + { + public int compare(Object a, Object b) + { + if (A[(Integer)a] < A[(Integer)b]) { return -1; } + if (A[(Integer)a] == A[(Integer)b]) { return 0; } + if (A[(Integer)a] > A[(Integer)b]) { return 1; } + return 0; + } + } + Integer[] permutation = new Integer[A.length]; + for (int i = 0; i < A.length; i++) + { + permutation[i] = i; + } + Arrays.sort(permutation, new comparator()); + return permutation; + } + + public static Integer[] SortPermutation(final double[] A) + { + class comparator implements Comparator + { + public int compare(Object a, Object b) + { + if (A[(Integer)a] < A[(Integer)b]) { return -1; } + if (A[(Integer)a] == A[(Integer)b]) { return 0; } + if (A[(Integer)a] > A[(Integer)b]) { return 1; } + return 0; + } + } + Integer[] permutation = new Integer[A.length]; + for (int i = 0; i < A.length; i++) + { + permutation[i] = i; + } + Arrays.sort(permutation, new comparator()); + return permutation; + } + + public static int[] PermuteArray(int[] array, Integer[] permutation) + { + int[] output = new int[array.length]; + for (int i = 0; i < output.length; i++) + { + output[i] = array[permutation[i]]; + } + return output; + } + + public static double[] PermuteArray(double[] array, Integer[] permutation) + { + double[] output = new double[array.length]; + for (int i = 0; i < output.length; i++) + { + output[i] = array[permutation[i]]; + } + return output; + } + + public static Object[] PermuteArray(Object[] array, Integer[] permutation) + { + Object[] output = new Object[array.length]; + for (int i = 0; i < output.length; i++) + { + output[i] = array[permutation[i]]; + } + return output; + } + + public static String[] PermuteArray(String[] array, Integer[] permutation) + { + String[] output = new String[array.length]; + for (int i = 0; i < output.length; i++) + { + output[i] = array[permutation[i]]; + } + return output; + } + } + + + +