minQual and minPower filters added. VCF output added.

Calls are now made based on the likelihood AC model. Two filters are applied: minQual and minPower. Output is now a VCF file with the variant context. It's now called the gatk's PoolCaller, no longer Replication Validation framework. Lots of testing ensue....
This commit is contained in:
Mauricio Carneiro 2011-07-28 18:58:36 -04:00
parent 07ec5dd63a
commit a58ddab93b
2 changed files with 77 additions and 8 deletions

View File

@ -419,6 +419,32 @@ public class BaseUtils {
return new String(simpleComplement(bases.getBytes()));
}
/**
* Returns the index of the most common base in the basecounts array. To be used with
* pileup.getBaseCounts.
*
* @param baseCounts counts of a,c,g,t in order.
* @return the index of the most common base
*/
static public int mostFrequentBaseIndex(int[] baseCounts) {
int mostFrequentBaseIndex = 0;
for (int baseIndex = 1; baseIndex < 4; baseIndex++) {
if (baseCounts[baseIndex] > baseCounts[mostFrequentBaseIndex]) {
mostFrequentBaseIndex = baseIndex;
}
}
return mostFrequentBaseIndex;
}
/**
* Returns the most common base in the basecounts array. To be used with pileup.getBaseCounts.
*
* @param baseCounts counts of a,c,g,t in order.
* @return the most common base
*/
static public byte mostFrequentSimpleBase(int[] baseCounts) {
return baseIndexToSimpleBase(mostFrequentBaseIndex(baseCounts));
}
/**
* For the most frequent base in the sequence, return the percentage of the read it constitutes.
@ -437,12 +463,7 @@ public class BaseUtils {
}
}
int mostFrequentBaseIndex = 0;
for (int baseIndex = 1; baseIndex < 4; baseIndex++) {
if (baseCounts[baseIndex] > baseCounts[mostFrequentBaseIndex]) {
mostFrequentBaseIndex = baseIndex;
}
}
int mostFrequentBaseIndex = mostFrequentBaseIndex(baseCounts);
return ((double) baseCounts[mostFrequentBaseIndex])/((double) sequence.length);
}

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.apache.lucene.messages.NLS;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -124,6 +125,16 @@ public class MathUtils {
}
/**
* Calculates the log10 cumulative sum of an array with log10 probabilities
* @param log10p the array with log10 probabilites
* @param upTo index in the array to calculate the cumsum up to
* @return the log10 of the cumulative sum
*/
public static double log10CumulativeSumLog10(double [] log10p, int upTo) {
return log10sumLog10(log10p, 0, upTo);
}
/**
* Converts a real space array of probabilities into a log10 array
* @param prRealSpace
@ -137,10 +148,14 @@ public class MathUtils {
}
public static double log10sumLog10(double[] log10p, int start) {
return log10sumLog10(log10p, start, log10p.length);
}
public static double log10sumLog10(double[] log10p, int start, int finish) {
double sum = 0.0;
double maxValue = Utils.findMaxEntry(log10p);
for ( int i = start; i < log10p.length; i++ ) {
for ( int i = start; i < finish; i++ ) {
sum += Math.pow(10.0, log10p[i] - maxValue);
}
@ -471,6 +486,18 @@ public class MathUtils {
return maxI;
}
public static int maxElementIndex(int[] array) {
if ( array == null ) throw new IllegalArgumentException("Array cannot be null!");
int maxI = -1;
for ( int i = 0; i < array.length; i++ ) {
if ( maxI == -1 || array[i] > array[maxI] )
maxI = i;
}
return maxI;
}
public static double arrayMax(double[] array) {
return array[maxElementIndex(array)];
}
@ -727,7 +754,7 @@ public class MathUtils {
return count;
}
public static <T> int countOccurrences(byte element, byte [] array) {
public static int countOccurrences(byte element, byte [] array) {
int count = 0;
for (byte y : array) {
if (element == y)
@ -737,6 +764,27 @@ public class MathUtils {
return count;
}
/**
* Returns the top (larger) N elements of the array. Naive n^2 implementation (Selection Sort).
* Better than sorting if N (number of elements to return) is small
*
* @param array the array
* @param n number of top elements to return
* @return the n larger elements of the array
*/
public static Collection<Double> getNMaxElements(double [] array, int n) {
ArrayList<Double> maxN = new ArrayList<Double>(n);
double lastMax = Double.MAX_VALUE;
for (int i=0; i<n; i++) {
double max = Double.MIN_VALUE;
for (double x : array) {
max = Math.min(lastMax, Math.max(x, max));
}
maxN.add(max);
lastMax = max;
}
return maxN;
}
/**
* Returns n random indices drawn with replacement from the range 0..(k-1)