Migration of some core calculations (log-likelihood probabilties, etc.) from CoverageAndPowerWalker into static methods in PoolUtils

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1527 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-09-03 21:43:29 +00:00
parent 93cedf4285
commit 544900aa99
2 changed files with 66 additions and 67 deletions

View File

@ -163,7 +163,7 @@ public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>,
double power = 0.0;
for ( int boot = 0; boot < BOOTSTRAP_ITERATIONS; boot++) {
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> snpReadsAndRefReads = coinTossPartition(reads,offsets,this.getSNPProportion(1));
if( calculateLogLikelihoodOfSample(snpReadsAndRefReads, num_individuals) > thresh) {
if( PoolUtils.calculateLogLikelihoodOfSample(snpReadsAndRefReads, num_individuals) > thresh) {
power += 1.0/BOOTSTRAP_ITERATIONS;
}
}
@ -209,41 +209,4 @@ public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>,
return partitionedReads;
}
public static double calculateLogLikelihoodOfSample(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> snpReadsRefReads, int nIndivids) {
List<Byte> qListSnps = getQList(snpReadsRefReads.getFirst().getFirst(),snpReadsRefReads.getSecond().getFirst());
List<Byte> qListRefs = getQList(snpReadsRefReads.getFirst().getSecond(),snpReadsRefReads.getSecond().getSecond());
Pair<Double,Double> logsumSNP = qListToSumLogProbabilities(true,qListSnps, 2.0*nIndivids);
Pair<Double,Double> logsumRef = qListToSumLogProbabilities(false,qListRefs, 2.0*nIndivids);
return 0.0 - logsumSNP.first - logsumRef.first + logsumSNP.second + logsumRef.second;
}
public static List<Byte> getQList(List<SAMRecord> reads, List<Integer> offsets) {
List<Byte> qscores = new LinkedList();
for(int readNo = 0; readNo < reads.size(); readNo++) {
qscores.add(reads.get(readNo).getBaseQualities()[offsets.get(readNo)]);
}
return qscores;
}
public static Pair<Double,Double> qListToSumLogProbabilities(boolean listRepresentsSNPObservations, List<Byte> qList, double denom)
{
double logProbObserveXAndSNPTrue = 0; // note "error" for SNP is observing a ref
double logProbObserveXAndRefTrue = 0;// and "error" for ref is observing a SNP
for (byte qual : qList) {
double p_err = QualityUtils.qualToErrorProb(qual);
if (listRepresentsSNPObservations) {
logProbObserveXAndSNPTrue += Math.log10((1 - p_err) / denom +((denom - 1)*p_err) / denom);
logProbObserveXAndRefTrue += Math.log10(p_err);
} else {
logProbObserveXAndSNPTrue += Math.log10((denom - 1) * (1 - p_err)/denom + p_err/denom);
logProbObserveXAndRefTrue+= Math.log10(1 -p_err);
}
}
return new Pair<Double,Double>(logProbObserveXAndSNPTrue,logProbObserveXAndRefTrue);
}
}

View File

@ -6,6 +6,7 @@ import java.util.List;
import java.util.ArrayList;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.QualityUtils;
/**
* Created by IntelliJ IDEA.
@ -16,7 +17,8 @@ import org.broadinstitute.sting.utils.Pair;
*/
public class PoolUtils {
private PoolUtils () {}
private PoolUtils() {
}
public static final int BASE_A_OFFSET = 0;
public static final int BASE_C_OFFSET = 1;
@ -67,22 +69,27 @@ public class PoolUtils {
for (int readNum = 0; readNum < reads.size(); readNum++) {
switch (reads.get(readNum).getReadBases()[offsets.get(readNum)]) {
case 'A':
case 'a': readsByBase[0].add(reads.get(readNum));
offsetsByBase[0].add(offsets.get(readNum));
case 'a':
readsByBase[BASE_A_OFFSET].add(reads.get(readNum));
offsetsByBase[BASE_A_OFFSET].add(offsets.get(readNum));
break;
case 'C':
case 'c': readsByBase[1].add(reads.get(readNum));
offsetsByBase[1].add(offsets.get(readNum));
case 'c':
readsByBase[BASE_C_OFFSET].add(reads.get(readNum));
offsetsByBase[BASE_C_OFFSET].add(offsets.get(readNum));
break;
case 'G':
case 'g': readsByBase[2].add(reads.get(readNum));
offsetsByBase[2].add(offsets.get(readNum));
case 'g':
readsByBase[BASE_G_OFFSET].add(reads.get(readNum));
offsetsByBase[BASE_G_OFFSET].add(offsets.get(readNum));
break;
case 'T':
case 't': readsByBase[3].add(reads.get(readNum));
offsetsByBase[3].add(offsets.get(readNum));
case 't':
readsByBase[BASE_T_OFFSET].add(reads.get(readNum));
offsetsByBase[BASE_T_OFFSET].add(offsets.get(readNum));
break;
default: break; // TODO: INDEL AWARENESS
default:
break; // TODO: INDEL AWARENESS
}
}
}
@ -149,8 +156,7 @@ public class PoolUtils {
return BASE_T_OFFSET;
}
public static List<Byte> getListOfBaseQualities(List<SAMRecord> reads,List<Integer> offsets) {
//TODO: this is a terrible method name. Change it to something better.
public static List<Byte> getReadBaseQualities(List<SAMRecord> reads, List<Integer> offsets) {
List<Byte> qualities = new ArrayList<Byte>(reads.size());
for (int readNo = 0; readNo < reads.size(); readNo++) {
qualities.add(reads.get(readNo).getBaseQualities()[offsets.get(readNo)]);
@ -158,4 +164,34 @@ public class PoolUtils {
return qualities;
}
public static double calculateLogLikelihoodOfSample(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> snpReadsRefReads, int nIndivids) {
List<Byte> qListSnps = getReadBaseQualities(snpReadsRefReads.getFirst().getFirst(),snpReadsRefReads.getSecond().getFirst());
List<Byte> qListRefs = getReadBaseQualities(snpReadsRefReads.getFirst().getSecond(),snpReadsRefReads.getSecond().getSecond());
Pair<Double,Double> logsumSNP = qListToSumLogProbabilities(true,qListSnps, 2.0*nIndivids);
Pair<Double,Double> logsumRef = qListToSumLogProbabilities(false,qListRefs, 2.0*nIndivids);
return 0.0 - logsumSNP.first - logsumRef.first + logsumSNP.second + logsumRef.second;
}
public static Pair<Double,Double> qListToSumLogProbabilities(boolean listRepresentsSNPObservations, List<Byte> qList, double denom)
{
double logProbObserveXAndSNPTrue = 0; // note "error" for SNP is observing a ref
double logProbObserveXAndRefTrue = 0;// and "error" for ref is observing a SNP
for (byte qual : qList) {
double p_err = QualityUtils.qualToErrorProb(qual);
if (listRepresentsSNPObservations) {
logProbObserveXAndSNPTrue += Math.log10((1 - p_err) / denom +((denom - 1)*p_err) / denom);
logProbObserveXAndRefTrue += Math.log10(p_err);
} else {
logProbObserveXAndSNPTrue += Math.log10((denom - 1) * (1 - p_err)/denom + p_err/denom);
logProbObserveXAndRefTrue+= Math.log10(1 -p_err);
}
}
return new Pair<Double,Double>(logProbObserveXAndSNPTrue,logProbObserveXAndRefTrue);
}
}