Fisher exact makes a return. Seems to be working properly. Current tagged as a work in progress. Needs to take the filtered context to be truly correct.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4561 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-10-22 20:35:44 +00:00
parent f06f955e06
commit 78e71c4167
1 changed files with 230 additions and 0 deletions

View File

@ -0,0 +1,230 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broad.tribble.vcf.VCFHeaderLineType;
import cern.jet.math.Arithmetic;
import java.util.List;
import java.util.Map;
import java.util.Arrays;
import java.util.HashMap;
public class FisherStrand implements InfoFieldAnnotation, WorkInProgressAnnotation {
private static final String REFFWD = "REFFWD";
private static final String REFREV = "REFREV";
private static final String ALTFWD = "ALTFWD";
private static final String ALTREV = "ALTREV";
private static final String FS = "FS";
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( ! vc.isVariant() || vc.isFiltered() || ! vc.isBiallelic() || ! vc.isSNP() )
return null;
int[][] table = getContingencyTable(stratifiedContexts, vc.getReference(), vc.getAlternateAllele(0));
Double pvalue = pValueForContingencyTable(table);
if ( pvalue == null )
return null;
// use Math.abs to prevent -0's
Map<String, Object> map = new HashMap<String, Object>();
int phredPValue = (int)Math.round(QualityUtils.phredScaleErrorRate(pvalue));
addInt(map, REFFWD, table[0][0]);
addInt(map, REFREV, table[0][1]);
addInt(map, ALTFWD, table[1][0]);
addInt(map, ALTREV, table[1][1]);
addInt(map, FS, phredPValue);
return map;
}
private static void addInt(Map<String, Object> map, String key, int value) {
map.put(key, String.format("%d", value));
}
public List<String> getKeyNames() {
return Arrays.asList(REFFWD,REFREV,ALTFWD,ALTREV,FS);
}
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(
new VCFInfoHeaderLine(REFFWD, 1, VCFHeaderLineType.Integer, "Count of bases with REF allele, forward strand"),
new VCFInfoHeaderLine(REFREV, 1, VCFHeaderLineType.Integer, "Count of bases with REF allele, reverse strand"),
new VCFInfoHeaderLine(ALTFWD, 1, VCFHeaderLineType.Integer, "Count of bases with ALT allele, forward strand"),
new VCFInfoHeaderLine(ALTREV, 1, VCFHeaderLineType.Integer, "Count of bases with ALT allele, reverse strand"),
new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Integer, "Integer-rounded Phred-scaled p-value using Fisher's exact test to detect strand bias"));
}
private Double pValueForContingencyTable(int[][] originalTable) {
int [][] table = copyContingencyTable(originalTable);
double pCutoff = computePValue(table);
//printTable(table, pCutoff);
double pValue = pCutoff;
while (rotateTable(table)) {
double pValuePiece = computePValue(table);
//printTable(table, pValuePiece);
if (pValuePiece <= pCutoff) {
pValue += pValuePiece;
}
}
table = copyContingencyTable(originalTable);
while (unrotateTable(table)) {
double pValuePiece = computePValue(table);
//printTable(table, pValuePiece);
if (pValuePiece <= pCutoff) {
pValue += pValuePiece;
}
}
//System.out.printf("P-cutoff: %f\n", pCutoff);
//System.out.printf("P-value: %f\n\n", pValue);
return pValue;
}
private static int [][] copyContingencyTable(int [][] t) {
int[][] c = new int[2][2];
for ( int i = 0; i < 2; i++ )
for ( int j = 0; j < 2; j++ )
c[i][j] = t[i][j];
return c;
}
private static void printTable(int[][] table, double pValue) {
System.out.printf("%d %d; %d %d : %f\n", table[0][0], table[0][1], table[1][0], table[1][1], pValue);
}
private static boolean rotateTable(int[][] table) {
table[0][0] -= 1;
table[1][0] += 1;
table[0][1] += 1;
table[1][1] -= 1;
return (table[0][0] >= 0 && table[1][1] >= 0) ? true : false;
}
private static boolean unrotateTable(int[][] table) {
table[0][0] += 1;
table[1][0] -= 1;
table[0][1] -= 1;
table[1][1] += 1;
return (table[0][1] >= 0 && table[1][0] >= 0) ? true : false;
}
private static double computePValue(int[][] table) {
int[] rowSums = { sumRow(table, 0), sumRow(table, 1) };
int[] colSums = { sumColumn(table, 0), sumColumn(table, 1) };
int N = rowSums[0] + rowSums[1];
// calculate in log space so we don't die with high numbers
double pCutoff = Arithmetic.logFactorial(rowSums[0])
+ Arithmetic.logFactorial(rowSums[1])
+ Arithmetic.logFactorial(colSums[0])
+ Arithmetic.logFactorial(colSums[1])
- Arithmetic.logFactorial(table[0][0])
- Arithmetic.logFactorial(table[0][1])
- Arithmetic.logFactorial(table[1][0])
- Arithmetic.logFactorial(table[1][1])
- Arithmetic.logFactorial(N);
return Math.exp(pCutoff);
}
private static int sumRow(int[][] table, int column) {
int sum = 0;
for (int r = 0; r < table.length; r++) {
sum += table[r][column];
}
return sum;
}
private static int sumColumn(int[][] table, int row) {
int sum = 0;
for (int c = 0; c < table[row].length; c++) {
sum += table[row][c];
}
return sum;
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
* allele1 # #
* allele2 # #
* @return a 2x2 contingency table
*/
private static int[][] getContingencyTable(Map<String, StratifiedAlignmentContext> stratifiedContexts, Allele ref, Allele alt) {
int[][] table = new int[2][2];
for ( Map.Entry<String, StratifiedAlignmentContext> sample : stratifiedContexts.entrySet() ) {
for (PileupElement p : sample.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()) {
if ( p.isDeletion() ) // ignore deletions
continue;
if ( p.getRead().getMappingQuality() < 20 || p.getQual() < 20 )
continue; // todo -- fixme, should take filtered context!
Allele base = Allele.create(p.getBase(), false);
boolean isFW = !p.getRead().getReadNegativeStrandFlag();
boolean matchesRef = ref.equals(base, true);
boolean matchesAlt = alt.equals(base, true);
if ( matchesRef || matchesAlt ) {
int row = matchesRef ? 0 : 1;
int column = isFW ? 0 : 1;
table[row][column]++;
}
}
}
return table;
}
}