From 69aa669928f69ba4c2988116634a458c3c148f9a Mon Sep 17 00:00:00 2001 From: depristo Date: Sun, 15 Mar 2009 22:42:24 +0000 Subject: [PATCH] git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@56 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/AlleleFrequencyWalker.java | 206 ++++++++++++++++++ .../gatk/walkers/BaseQualityHistoWalker.java | 59 +++++ .../sting/gatk/walkers/BasicLociWalker.java | 37 ++++ .../sting/gatk/walkers/BasicReadWalker.java | 31 +++ .../sting/gatk/walkers/CountLociWalker.java | 25 +++ .../sting/gatk/walkers/CountReadsWalker.java | 16 ++ .../gatk/walkers/DepthOfCoverageWalker.java | 26 +++ .../sting/gatk/walkers/LocusWalker.java | 30 +++ .../sting/gatk/walkers/NullWalker.java | 41 ++++ .../sting/gatk/walkers/PileupWalker.java | 84 +++++++ .../sting/gatk/walkers/PrintReadsWalker.java | 17 ++ .../sting/gatk/walkers/ReadWalker.java | 28 +++ .../gatk/walkers/SingleSampleGenotyper.java | 139 ++++++++++++ 13 files changed, 739 insertions(+) create mode 100755 playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java create mode 100755 playground/java/src/org/broadinstitute/sting/gatk/walkers/BaseQualityHistoWalker.java create mode 100755 playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicLociWalker.java create mode 100755 playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicReadWalker.java create mode 100755 playground/java/src/org/broadinstitute/sting/gatk/walkers/CountLociWalker.java create mode 100755 playground/java/src/org/broadinstitute/sting/gatk/walkers/CountReadsWalker.java create mode 100755 playground/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java create mode 100755 playground/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java create mode 100644 playground/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java create mode 100644 playground/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java create mode 100755 playground/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java create mode 100755 playground/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java create mode 100644 playground/java/src/org/broadinstitute/sting/gatk/walkers/SingleSampleGenotyper.java diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java new file mode 100755 index 000000000..792e2a61f --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java @@ -0,0 +1,206 @@ +package org.broadinstitute.sting.gatk.walkers; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import net.sf.samtools.SAMRecord; + +import java.util.List; + +public class AlleleFrequencyWalker extends BasicLociWalker { + + + + public Integer map(List rodData, char ref, LocusContext context) { + + // Convert context data into bases and 4-base quals + + // Set number of chromosomes, N, to 2 for now + int N = 2; + + // Convert bases to CharArray + int numReads = context.getReads().size(); //numReads(); + byte[] bases = new byte[numReads]; + double[][] quals = new double[numReads][4]; + int refnum = nuc2num[ref]; + + List reads = context.getReads(); + List offsets = context.getOffsets(); + for (int i =0; i < numReads; i++ ) { + SAMRecord read = reads.get(i); + int offset = offsets.get(i); + + bases[i] = read.getReadBases()[offset]; + int callednum = nuc2num[bases[i]]; + quals[i][callednum] = 1 - Math.pow(10.0, (double)read.getBaseQualities()[offset] / -10.0); + + // Set all nonref qual scores to their share of the remaining probality not "used" by the reference base's qual + double nonref_quals = (1.0 - quals[i][callednum]) / 3; + for (int b=0; b<4; b++) + if (b != callednum) + quals[i][b] = nonref_quals; + } + + // Print quals for debugging + System.out.println("Base quality matrix"); + for (int b=0; b<4; b++) { + System.out.print(num2nuc[b]); + for (int i=0; i < numReads; i++){ + System.out.format(" %.4f", quals[i][b]); + } + System.out.println(); + } + + // Count bases + int[] base_counts = new int[4]; + for (byte b : bases) + base_counts[nuc2num[b]]++; + + // Find alternate allele - 2nd most frequent non-ref allele + // (maybe we should check for ties and eval both or check most common including quality scores) + int altnum = -1; // start with first base numerical identity (0,1,2,3) + double altcount = -1; // start with first base count + + for (int b=0; b<4; b++) { + if ((b != refnum) && (base_counts[b] > altcount)) { + altnum = b; altcount = base_counts[b]; + } + } + assert(altnum > 0); + + if (true) { //altcount > 2) { + System.out.format("Pos: %s | ref: %c | alt: %c | %2dA | %2dC | %2dT | %2dG | %2d total\n", context.getLocation(), num2nuc[refnum], num2nuc[altnum], + base_counts[0], base_counts[1], base_counts[2], base_counts[3], bases.length); + + // Check if we really want to do this one + if (bases.length == 0) { + System.out.println("No reads at position; no call being made"); + }else{ + AlleleFrequencyEstimator(N, bases, quals, refnum, altnum); + } + } + System.out.println(); + + return 1; + } + + static void AlleleFrequencyEstimator(int N, byte[] bases, double[][] quals, int refnum, int altnum) { + + // q = hypothetical %nonref + // qstar = true %nonref + // G = N, qstar, alt + // N = number of chromosomes + // alt = alternate hypothesis base (most frequent nonref base) + // ref = reference base + + double epsilon = 0; // 1e-2; + double qstar; + System.out.format("%4s | ", "q "); + for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N) + System.out.format("%5s%12s %12s %12s | ", "qstar", "p(D|q) ", "p(q|G) ", "posterior "); + System.out.println(); + + double highest_posterior = -1.0; + double highest_qstar = -1.0; + double highest_q = -1.0; + + double qstep = 0.01; + double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating + for (double q=0.0; q <= qend; q += qstep) { + System.out.format("%.2f | ", q); + for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N) { + double pDq = P_D_q(bases, quals, q, refnum, altnum); + double pqG = P_q_G(bases, N, q, qstar); + double pG = P_G(N, qstar, altnum); + double posterior = pDq * pqG * pG; + System.out.format("%.2f %.10f %.10f %.10f | ", qstar, pDq, pqG, posterior); + if (posterior > highest_posterior) { + highest_posterior = posterior; + highest_qstar = qstar; + highest_q = q; + } + } + System.out.println(); + } + System.out.printf("Maximal likelihood posterior: %.6f\n", highest_posterior); + System.out.printf("q that maximimizes posterior: %.6f\n", highest_q); + System.out.printf("qstar used when calculating max q: %.6f\n", highest_qstar); + + // Print out the called bases + long numNonrefBases = Math.round(highest_q * N); + long numRefBases = N - numNonrefBases; + String genotype = repeat(num2nuc[refnum], numRefBases) + repeat(num2nuc[altnum], numNonrefBases); + System.out.printf("Maximal likelihood genotype: %s\n", genotype); + } + + static double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum) { + double p = 1.0; + + for (int i=0; i k; i--, j++) + t = t * i / j; + return t; + } + + public static String repeat(char letter, long count) { + String result = ""; + for (int i=0; i { + long[] qualCounts = new long[100]; + + public void initialize() { + for ( int i = 0; i < this.qualCounts.length; i++ ) { + this.qualCounts[i] = 0; + } + } + + public String walkerType() { return "ByRead"; } + + // Do we actually want to operate on the context? + public boolean filter(LocusContext context, SAMRecord read) { + return true; // We are keeping all the reads + } + + // Map over the org.broadinstitute.sting.gatk.LocusContext + public Integer map(LocusContext context, SAMRecord read) { + for ( byte qual : read.getBaseQualities() ) { + //System.out.println(qual); + this.qualCounts[qual]++; + } + //System.out.println(read.getReadName()); + return 1; + } + + // Given result of map function + public Integer reduceInit() { return 0; } + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + public void onTraversalDone() { + int lastNonZero = -1; + for ( int i = this.qualCounts.length-1; i >= 0; i-- ) { + if ( this.qualCounts[i] > 0 ) { + lastNonZero = i; + break; + } + } + + for ( int i = 0; i < lastNonZero+1; i++ ) { + System.out.printf("%3d : %10d%n", i, this.qualCounts[i]); + } + } +} diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicLociWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicLociWalker.java new file mode 100755 index 000000000..3a42d18fe --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicLociWalker.java @@ -0,0 +1,37 @@ +package org.broadinstitute.sting.gatk.walkers; + +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; + +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: mdepristo + * Date: Feb 22, 2009 + * Time: 3:22:14 PM + * To change this template use File | Settings | File Templates. + */ +public abstract class BasicLociWalker implements LocusWalker { + public void initialize() { + ; + } + + public String walkerType() { return "ByLocus"; } + + // Do we actually want to operate on the context? + public boolean filter(List rodData, char ref, LocusContext context) { + return true; // We are keeping all the reads + } + + public void onTraversalDone() { + ; + } + + // These three capabilities must be overidden + public abstract MapType map(List rodData, char ref, LocusContext context); + public abstract ReduceType reduceInit(); + public abstract ReduceType reduce(MapType value, ReduceType sum); + +} diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicReadWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicReadWalker.java new file mode 100755 index 000000000..209aa6773 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/BasicReadWalker.java @@ -0,0 +1,31 @@ +package org.broadinstitute.sting.gatk.walkers; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; + +/** + * Created by IntelliJ IDEA. + * User: mdepristo + * Date: Feb 22, 2009 + * Time: 2:52:28 PM + * To change this template use File | Settings | File Templates. + */ +public abstract class BasicReadWalker implements ReadWalker { + public void initialize() { } + public String walkerType() { return "ByRead"; } + + public boolean filter(LocusContext context, SAMRecord read) { + // We are keeping all the reads + return true; + } + + public void onTraversalDone() { + ; + } + + // Three basic abstract function that *must* be overridden + public abstract MapType map(LocusContext context, SAMRecord read); + public abstract ReduceType reduceInit(); + public abstract ReduceType reduce(MapType value, ReduceType sum); +} diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/CountLociWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/CountLociWalker.java new file mode 100755 index 000000000..bf4c758d1 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/CountLociWalker.java @@ -0,0 +1,25 @@ +package org.broadinstitute.sting.gatk.walkers; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; + +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: mdepristo + * Date: Feb 22, 2009 + * Time: 3:22:14 PM + * To change this template use File | Settings | File Templates. + */ +public class CountLociWalker extends BasicLociWalker { + public Integer map(List rodData, char ref, LocusContext context) { + return 1; + } + + public Integer reduceInit() { return 0; } + + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } +} diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/CountReadsWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/CountReadsWalker.java new file mode 100755 index 000000000..595349688 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/CountReadsWalker.java @@ -0,0 +1,16 @@ +package org.broadinstitute.sting.gatk.walkers; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.LocusContext; + +public class CountReadsWalker extends BasicReadWalker { + public Integer map(LocusContext context, SAMRecord read) { + return 1; + } + + public Integer reduceInit() { return 0; } + + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } +} diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java new file mode 100755 index 000000000..1253f245c --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/DepthOfCoverageWalker.java @@ -0,0 +1,26 @@ +package org.broadinstitute.sting.gatk.walkers; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; + +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: mdepristo + * Date: Feb 22, 2009 + * Time: 3:22:14 PM + * To change this template use File | Settings | File Templates. + */ +public class DepthOfCoverageWalker extends BasicLociWalker { + public Integer map(List rodData, char ref, LocusContext context) { + System.out.printf("%s: %d%n", context.getLocation(), context.getReads().size() ); + return 0; + } + + public Integer reduceInit() { return 0; } + + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } +} \ No newline at end of file diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java new file mode 100755 index 000000000..2f563f752 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.gatk.walkers; + +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.LocusContext; + +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: mdepristo + * Date: Feb 22, 2009 + * Time: 2:52:28 PM + * To change this template use File | Settings | File Templates. + */ +public interface LocusWalker { + void initialize(); + public String walkerType(); + + // Do we actually want to operate on the context? + boolean filter(List rodData, char ref, LocusContext context); + + // Map over the org.broadinstitute.sting.gatk.LocusContext + MapType map(List rodData, char ref, LocusContext context); + + // Given result of map function + ReduceType reduceInit(); + ReduceType reduce(MapType value, ReduceType sum); + + void onTraversalDone(); +} diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java new file mode 100644 index 000000000..5b0347f4b --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/NullWalker.java @@ -0,0 +1,41 @@ +package org.broadinstitute.sting.gatk.walkers; + +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; + +import java.util.List; + +// Null traversal. For ATK performance measuring. +// j.maguire 3-7-2009 + +public class NullWalker implements LocusWalker { + public void initialize() { + } + + public String walkerType() { return "ByLocus"; } + + // Do we actually want to operate on the context? + public boolean filter(List rodData, char ref, LocusContext context) { + return true; // We are keeping all the reads + } + + // Map over the org.broadinstitute.sting.gatk.LocusContext + public Integer map(List rodData, char ref, LocusContext context) + { + return 1; + } + + // Given result of map function + public Integer reduceInit() + { + return 0; + } + public Integer reduce(Integer value, Integer sum) + { + return 0; + } + + public void onTraversalDone() { + } +} diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java new file mode 100644 index 000000000..a895849c6 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -0,0 +1,84 @@ +package org.broadinstitute.sting.gatk.walkers; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import net.sf.samtools.SAMRecord; + +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: mdepristo + * Date: Feb 22, 2009 + * Time: 3:22:14 PM + * To change this template use File | Settings | File Templates. + */ +public class PileupWalker extends BasicLociWalker { + public void initialize() { + } + + public String walkerType() { return "ByLocus"; } + + // Do we actually want to operate on the context? + public boolean filter(List rodData, char ref, LocusContext context) { + return true; // We are keeping all the reads + } + + // Map over the org.broadinstitute.sting.gatk.LocusContext + public Integer map(List rodData, char ref, LocusContext context) { + //System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size()); + //for ( SAMRecord read : context.getReads() ) { + // System.out.println(" -> " + read.getReadName()); + //} + + List reads = context.getReads(); + List offsets = context.getOffsets(); + String bases = ""; + String quals = ""; + //String offsetString = ""; + for ( int i = 0; i < reads.size(); i++ ) { + SAMRecord read = reads.get(i); + int offset = offsets.get(i); + + //if ( offset >= read.getReadString().length() ) + // System.out.printf(" [%2d] [%s] %s%n", offset, read.format(), read.getReadString()); + + bases += read.getReadString().charAt(offset); + quals += read.getBaseQualityString().charAt(offset); + //offsetString += i; + //System.out.printf(" [%2d] [%s] %s%n", offset, read.getReadString().charAt(offset), read.getReadString()); + } + + String rodString = ""; + for ( ReferenceOrderedDatum datum : rodData ) { + if ( datum != null ) { + if ( datum instanceof rodDbSNP) { + rodDbSNP dbsnp = (rodDbSNP)datum; + //System.out.printf(" DBSNP %s on %s => %s%n", dbsnp.toSimpleString(), dbsnp.strand, Utils.join("/", dbsnp.getAllelesFWD())); + rodString += dbsnp.toMediumString(); + } + else { + rodString += datum.toSimpleString(); + } + } + } + if ( rodString != "" ) + rodString = "[ROD: " + rodString + "]"; + + if ( context.getLocation().getStart() % 1 == 0 ) { + System.out.printf("%s: %s %s %s %s%n", context.getLocation(), ref, bases, quals, rodString); + } + + //for ( int offset : context.getOffsets() ) { + // System.out.println(" -> " + read.getReadName()); + //} + return 1; + } + + // Given result of map function + public Integer reduceInit() { return 0; } + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } +} diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java new file mode 100755 index 000000000..4aff75943 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -0,0 +1,17 @@ +package org.broadinstitute.sting.gatk.walkers; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.LocusContext; + +public class PrintReadsWalker extends BasicReadWalker { + public Integer map(LocusContext context, SAMRecord read) { + System.out.println(read.format()); + return 1; + } + + public Integer reduceInit() { return 0; } + + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } +} diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java new file mode 100755 index 000000000..14cecfa96 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.gatk.walkers; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.LocusContext; + +/** + * Created by IntelliJ IDEA. + * User: mdepristo + * Date: Feb 22, 2009 + * Time: 2:52:28 PM + * To change this template use File | Settings | File Templates. + */ +public interface ReadWalker { + void initialize(); + public String walkerType(); + + // Do we actually want to operate on the context? + boolean filter(LocusContext context, SAMRecord read); + + // Map over the org.broadinstitute.sting.gatk.LocusContext + MapType map(LocusContext context, SAMRecord read); + + // Given result of map function + ReduceType reduceInit(); + ReduceType reduce(MapType value, ReduceType sum); + + void onTraversalDone(); +} diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/SingleSampleGenotyper.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/SingleSampleGenotyper.java new file mode 100644 index 000000000..3b483f6c1 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/SingleSampleGenotyper.java @@ -0,0 +1,139 @@ +package org.broadinstitute.sting.gatk.walkers; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.utils.Utils; +import net.sf.samtools.SAMRecord; + +import java.util.List; + +// Draft single sample genotyper +// j.maguire 3-7-2009 + +public class SingleSampleGenotyper extends BasicLociWalker { + public boolean filter(List rodData, char ref, LocusContext context) { + return true; // We are keeping all the reads + } + + protected class GenotypeLikelihoods + { + public double[] likelihoods; + public String[] genotypes; + + GenotypeLikelihoods() + { + likelihoods = new double[10]; + genotypes = new String[10]; + + genotypes[0] = "AA"; + genotypes[1] = "AC"; + genotypes[2] = "AG"; + genotypes[3] = "AT"; + genotypes[4] = "CC"; + genotypes[5] = "CG"; + genotypes[6] = "CT"; + genotypes[7] = "GG"; + genotypes[8] = "GT"; + genotypes[9] = "TT"; + } + + void add(char ref, char read, byte qual) + { + double p_error = Math.pow(10.0, (double)qual / -10); + for (int i = 0; i < genotypes.length; i++) + { + likelihoods[i] += AlleleLikelihood(ref, read, genotypes[i], p_error); + } + } + + double AlleleLikelihood(char ref, char read, String genotype, double p_error) + { + char h1 = genotype.charAt(0); + char h2 = genotype.charAt(1); + + double p_base; + + if ((h1 == h2) && (h1 == read)) { p_base = Math.log10(1-p_error); } + else if ((h1 != h2) && (h1 == read) || (h2 == read)) { p_base = Math.log10(0.5 - (p_error/2.0)); } + else { p_base = Math.log10(p_error); } + + return p_base; + } + + public String toString() + { + Integer[] permutation = Utils.SortPermutation(likelihoods); + String[] sorted_genotypes = Utils.PermuteArray(genotypes, permutation); + double[] sorted_likelihoods = Utils.PermuteArray(likelihoods, permutation); + + String s = ""; + for (int i = sorted_genotypes.length-1; i >= 0; i--) + { + if (i != sorted_genotypes.length-1) { s = s + " "; } + s = s + sorted_genotypes[i] + ":" + sorted_likelihoods[i]; + } + return s; + } + + } + + // Map over the org.broadinstitute.sting.gatk.LocusContext + public Integer map(List rodData, char ref, LocusContext context) { + //System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size()); + //for ( SAMRecord read : context.getReads() ) { + // System.out.println(" -> " + read.getReadName()); + //} + + List reads = context.getReads(); + List offsets = context.getOffsets(); + String bases = ""; + String quals = ""; + //String offsetString = ""; + + // Look up hapmap and dbsnp priors + String rodString = ""; + for ( ReferenceOrderedDatum datum : rodData ) + { + if ( datum != null ) + { + if ( datum instanceof rodDbSNP) + { + rodDbSNP dbsnp = (rodDbSNP)datum; + rodString += dbsnp.toMediumString(); + } + else + { + rodString += datum.toSimpleString(); + } + } + } + if ( rodString != "" ) + rodString = "[ROD: " + rodString + "]"; + + // Accumulate genotype likelihoods + GenotypeLikelihoods G = new GenotypeLikelihoods(); + for ( int i = 0; i < reads.size(); i++ ) + { + SAMRecord read = reads.get(i); + int offset = offsets.get(i); + bases += read.getReadString().charAt(offset); + quals += read.getBaseQualityString().charAt(offset); + + G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]); + } + + if ( context.getLocation().getStart() % 1 == 0 ) { + //System.out.printf("%s: %s %s %s %s%n", context.getLocation(), ref, bases, quals, rodString); + System.out.printf("%s %s %s %s\n", ref, bases, G.toString(), rodString); + } + + return 1; + } + + // Given result of map function + public Integer reduceInit() { return 0; } + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } +}