Added NullWalker module (do nothing).
Added SingleSampleGenotyper module (old-school single sample genotyping). Added Utils.SortPermutation (return the permutaton that would sort the input array). Added Utils.PermuteArray (apply a permutation to an array). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@29 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6b8650b0c0
commit
716af47dd5
|
|
@ -37,6 +37,8 @@ public class AnalysisTK extends CommandLineProgram {
|
|||
addModule("CountReads", new CountReadsWalker());
|
||||
addModule("PrintReads", new PrintReadsWalker());
|
||||
addModule("Base_Quality_Histogram", new BaseQualityHistoWalker());
|
||||
addModule("SingleSampleGenotyper", new SingleSampleGenotyper());
|
||||
addModule("Null", new NullWalker());
|
||||
}
|
||||
|
||||
private TraversalEngine engine = null;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,44 @@
|
|||
package edu.mit.broad.sting.atk.modules;
|
||||
|
||||
import edu.mit.broad.sting.atk.LocusWalker;
|
||||
import edu.mit.broad.sting.atk.LocusIterator;
|
||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
||||
import edu.mit.broad.sting.utils.rodDbSNP;
|
||||
import edu.mit.broad.sting.utils.Utils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
// Null traversal. For ATK performance measuring.
|
||||
// j.maguire 3-7-2009
|
||||
|
||||
public class NullWalker implements LocusWalker<Integer, Integer> {
|
||||
public void initialize() {
|
||||
}
|
||||
|
||||
public String walkerType() { return "ByLocus"; }
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
||||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
// Map over the edu.mit.broad.sting.atk.LocusContext
|
||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Given result of map function
|
||||
public Integer reduceInit()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
public Integer reduce(Integer value, Integer sum)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
public void onTraveralDone() {
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,149 @@
|
|||
package edu.mit.broad.sting.atk.modules;
|
||||
|
||||
import edu.mit.broad.sting.atk.LocusWalker;
|
||||
import edu.mit.broad.sting.atk.LocusIterator;
|
||||
import edu.mit.broad.sting.utils.ReferenceOrderedDatum;
|
||||
import edu.mit.broad.sting.utils.rodDbSNP;
|
||||
import edu.mit.broad.sting.utils.Utils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
// Draft single sample genotyper
|
||||
// j.maguire 3-7-2009
|
||||
|
||||
public class SingleSampleGenotyper implements LocusWalker<Integer, Integer> {
|
||||
public void initialize() {
|
||||
}
|
||||
|
||||
public String walkerType() { return "ByLocus"; }
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
||||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
protected class GenotypeLikelihoods
|
||||
{
|
||||
public double[] likelihoods;
|
||||
public String[] genotypes;
|
||||
|
||||
GenotypeLikelihoods()
|
||||
{
|
||||
likelihoods = new double[10];
|
||||
genotypes = new String[10];
|
||||
|
||||
genotypes[0] = "AA";
|
||||
genotypes[1] = "AC";
|
||||
genotypes[2] = "AG";
|
||||
genotypes[3] = "AT";
|
||||
genotypes[4] = "CC";
|
||||
genotypes[5] = "CG";
|
||||
genotypes[6] = "CT";
|
||||
genotypes[7] = "GG";
|
||||
genotypes[8] = "GT";
|
||||
genotypes[9] = "TT";
|
||||
}
|
||||
|
||||
void add(char ref, char read, byte qual)
|
||||
{
|
||||
double p_error = Math.pow(10.0, (double)qual / -10);
|
||||
for (int i = 0; i < genotypes.length; i++)
|
||||
{
|
||||
likelihoods[i] += AlleleLikelihood(ref, read, genotypes[i], p_error);
|
||||
}
|
||||
}
|
||||
|
||||
double AlleleLikelihood(char ref, char read, String genotype, double p_error)
|
||||
{
|
||||
char h1 = genotype.charAt(0);
|
||||
char h2 = genotype.charAt(1);
|
||||
|
||||
double p_base;
|
||||
|
||||
if ((h1 == h2) && (h1 == read)) { p_base = Math.log10(1-p_error); }
|
||||
else if ((h1 != h2) && (h1 == read) || (h2 == read)) { p_base = Math.log10(0.5 - (p_error/2.0)); }
|
||||
else { p_base = Math.log10(p_error); }
|
||||
|
||||
return p_base;
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
Integer[] permutation = Utils.SortPermutation(likelihoods);
|
||||
String[] sorted_genotypes = Utils.PermuteArray(genotypes, permutation);
|
||||
double[] sorted_likelihoods = Utils.PermuteArray(likelihoods, permutation);
|
||||
|
||||
String s = "";
|
||||
for (int i = sorted_genotypes.length-1; i >= 0; i--)
|
||||
{
|
||||
if (i != sorted_genotypes.length-1) { s = s + " "; }
|
||||
s = s + sorted_genotypes[i] + ":" + sorted_likelihoods[i];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Map over the edu.mit.broad.sting.atk.LocusContext
|
||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusIterator context) {
|
||||
//System.out.printf("Reads %s:%d %d%n", context.getContig(), context.getPosition(), context.getReads().size());
|
||||
//for ( SAMRecord read : context.getReads() ) {
|
||||
// System.out.println(" -> " + read.getReadName());
|
||||
//}
|
||||
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
String bases = "";
|
||||
String quals = "";
|
||||
//String offsetString = "";
|
||||
|
||||
// Look up hapmap and dbsnp priors
|
||||
String rodString = "";
|
||||
for ( ReferenceOrderedDatum datum : rodData )
|
||||
{
|
||||
if ( datum != null )
|
||||
{
|
||||
if ( datum instanceof rodDbSNP)
|
||||
{
|
||||
rodDbSNP dbsnp = (rodDbSNP)datum;
|
||||
rodString += dbsnp.toMediumString();
|
||||
}
|
||||
else
|
||||
{
|
||||
rodString += datum.toSimpleString();
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( rodString != "" )
|
||||
rodString = "[ROD: " + rodString + "]";
|
||||
|
||||
// Accumulate genotype likelihoods
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
||||
for ( int i = 0; i < reads.size(); i++ )
|
||||
{
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
bases += read.getReadString().charAt(offset);
|
||||
quals += read.getBaseQualityString().charAt(offset);
|
||||
|
||||
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
|
||||
}
|
||||
|
||||
if ( context.getLocation().getStart() % 1 == 0 ) {
|
||||
//System.out.printf("%s: %s %s %s %s%n", context.getLocation(), ref, bases, quals, rodString);
|
||||
System.out.printf("%s %s %s %s\n", ref, bases, G.toString(), rodString);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Given result of map function
|
||||
public Integer reduceInit() { return 0; }
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
public void onTraveralDone() {
|
||||
}
|
||||
}
|
||||
|
|
@ -108,4 +108,93 @@ public class Utils {
|
|||
|
||||
GenomeLoc.setContigOrdering(refContigOrdering);
|
||||
}
|
||||
|
||||
// Java Generics can't do primitive types, so I had to do this the simplistic way
|
||||
|
||||
public static Integer[] SortPermutation(final int[] A)
|
||||
{
|
||||
class comparator implements Comparator
|
||||
{
|
||||
public int compare(Object a, Object b)
|
||||
{
|
||||
if (A[(Integer)a] < A[(Integer)b]) { return -1; }
|
||||
if (A[(Integer)a] == A[(Integer)b]) { return 0; }
|
||||
if (A[(Integer)a] > A[(Integer)b]) { return 1; }
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
Integer[] permutation = new Integer[A.length];
|
||||
for (int i = 0; i < A.length; i++)
|
||||
{
|
||||
permutation[i] = i;
|
||||
}
|
||||
Arrays.sort(permutation, new comparator());
|
||||
return permutation;
|
||||
}
|
||||
|
||||
public static Integer[] SortPermutation(final double[] A)
|
||||
{
|
||||
class comparator implements Comparator
|
||||
{
|
||||
public int compare(Object a, Object b)
|
||||
{
|
||||
if (A[(Integer)a] < A[(Integer)b]) { return -1; }
|
||||
if (A[(Integer)a] == A[(Integer)b]) { return 0; }
|
||||
if (A[(Integer)a] > A[(Integer)b]) { return 1; }
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
Integer[] permutation = new Integer[A.length];
|
||||
for (int i = 0; i < A.length; i++)
|
||||
{
|
||||
permutation[i] = i;
|
||||
}
|
||||
Arrays.sort(permutation, new comparator());
|
||||
return permutation;
|
||||
}
|
||||
|
||||
public static int[] PermuteArray(int[] array, Integer[] permutation)
|
||||
{
|
||||
int[] output = new int[array.length];
|
||||
for (int i = 0; i < output.length; i++)
|
||||
{
|
||||
output[i] = array[permutation[i]];
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
public static double[] PermuteArray(double[] array, Integer[] permutation)
|
||||
{
|
||||
double[] output = new double[array.length];
|
||||
for (int i = 0; i < output.length; i++)
|
||||
{
|
||||
output[i] = array[permutation[i]];
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
public static Object[] PermuteArray(Object[] array, Integer[] permutation)
|
||||
{
|
||||
Object[] output = new Object[array.length];
|
||||
for (int i = 0; i < output.length; i++)
|
||||
{
|
||||
output[i] = array[permutation[i]];
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
public static String[] PermuteArray(String[] array, Integer[] permutation)
|
||||
{
|
||||
String[] output = new String[array.length];
|
||||
for (int i = 0; i < output.length; i++)
|
||||
{
|
||||
output[i] = array[permutation[i]];
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue