Fisher exact makes a return. Seems to be working properly. Current tagged as a work in progress. Needs to take the filtered context to be truly correct.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4562 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-10-22 20:35:53 +00:00
parent 78e71c4167
commit 9fe71dc452
1 changed files with 0 additions and 191 deletions

View File

@ -1,191 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import net.sf.samtools.SAMRecord;
import cern.jet.math.Arithmetic;
import java.util.List;
public class FisherStrand implements VariantAnnotation {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
if ( !(variation instanceof VariantBackedByGenotype) )
return null;
final List<Genotype> genotypes = ((VariantBackedByGenotype)variation).getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
// this test doesn't make sense for homs
Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes);
if ( genotype == null )
return null;
final String genotypeStr = genotype.getBases().toUpperCase();
if ( genotypeStr.length() != 2 )
return null;
int allele1 = BaseUtils.simpleBaseToBaseIndex(genotypeStr.charAt(0));
int allele2 = BaseUtils.simpleBaseToBaseIndex(genotypeStr.charAt(1));
Double pvalue = strandTest(pileup, allele1, allele2);
if ( pvalue == null )
return null;
// use Math.abs to prevent -0's
return String.format("%.1f", Math.abs(QualityUtils.phredScaleErrorRate(pvalue)));
}
public String getKeyName() { return "FisherStrand"; }
public String getDescription() { return "FisherStrand,1,Float,\"Phred-scaled p-value Using Fisher's Exact Test to Detect Strand Bias\""; }
public boolean useZeroQualityReads() { return false; }
private Double strandTest(ReadBackedPileup pileup, int allele1, int allele2) {
int[][] table = getContingencyTable(pileup, allele1, allele2);
if ( !variantIsHet(table) )
return null;
double pCutoff = computePValue(table);
//printTable(table, pCutoff);
double pValue = pCutoff;
while (rotateTable(table)) {
double pValuePiece = computePValue(table);
//printTable(table, pValuePiece);
if (pValuePiece <= pCutoff) {
pValue += pValuePiece;
}
}
table = getContingencyTable(pileup, allele1, allele2);
while (unrotateTable(table)) {
double pValuePiece = computePValue(table);
//printTable(table, pValuePiece);
if (pValuePiece <= pCutoff) {
pValue += pValuePiece;
}
}
//System.out.printf("P-cutoff: %f\n", pCutoff);
//System.out.printf("P-value: %f\n\n", pValue);
return pValue;
}
private static boolean variantIsHet(int[][] table) {
return ((table[0][1] != 0 || table[0][1] != 0) && (table[1][0] != 0 || table[1][1] != 0));
}
private static void printTable(int[][] table, double pValue) {
System.out.printf("%d %d; %d %d : %f\n", table[0][0], table[0][1], table[1][0], table[1][1], pValue);
}
private static boolean rotateTable(int[][] table) {
table[0][0] -= 1;
table[1][0] += 1;
table[0][1] += 1;
table[1][1] -= 1;
return (table[0][0] >= 0 && table[1][1] >= 0) ? true : false;
}
private static boolean unrotateTable(int[][] table) {
table[0][0] += 1;
table[1][0] -= 1;
table[0][1] -= 1;
table[1][1] += 1;
return (table[0][1] >= 0 && table[1][0] >= 0) ? true : false;
}
private double computePValue(int[][] table) {
int[] rowSums = { sumRow(table, 0), sumRow(table, 1) };
int[] colSums = { sumColumn(table, 0), sumColumn(table, 1) };
int N = rowSums[0] + rowSums[1];
// calculate in log space so we don't die with high numbers
double pCutoff = Arithmetic.logFactorial(rowSums[0])
+ Arithmetic.logFactorial(rowSums[1])
+ Arithmetic.logFactorial(colSums[0])
+ Arithmetic.logFactorial(colSums[1])
- Arithmetic.logFactorial(table[0][0])
- Arithmetic.logFactorial(table[0][1])
- Arithmetic.logFactorial(table[1][0])
- Arithmetic.logFactorial(table[1][1])
- Arithmetic.logFactorial(N);
return Math.exp(pCutoff);
}
private static int sumRow(int[][] table, int column) {
int sum = 0;
for (int r = 0; r < table.length; r++) {
sum += table[r][column];
}
return sum;
}
private static int sumColumn(int[][] table, int row) {
int sum = 0;
for (int c = 0; c < table[row].length; c++) {
sum += table[row][c];
}
return sum;
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
* allele1 # #
* allele2 # #
* @param pileup the pileup for the locus
* @param allele1 information for the called variant
* @param allele2 information for the called variant
* @return a 2x2 contingency table
*/
private static int[][] getContingencyTable(ReadBackedPileup pileup, int allele1, int allele2) {
int[][] table = new int[2][2];
List<SAMRecord> reads = pileup.getReads();
List<Integer> offsets = pileup.getOffsets();
for (int readIndex = 0; readIndex < reads.size(); readIndex++) {
SAMRecord read = reads.get(readIndex);
int offset = offsets.get(readIndex);
// skip over deletion sites
if ( offset == -1 )
continue;
int readAllele = BaseUtils.simpleBaseToBaseIndex((char)read.getReadBases()[offset]);
boolean isFW = !read.getReadNegativeStrandFlag();
if (readAllele == allele1 || readAllele == allele2) {
int row = (readAllele == allele1) ? 0 : 1;
int column = isFW ? 0 : 1;
table[row][column]++;
}
}
return table;
}
}