Adding ReadPos rank sum test. Transitioned rank sum tests over to using Chris's implementation in order to harmonize the codebase. There isn't any reason to have competing implementations of rank sum. Thanks to Chris for adding the necessary hypothesis testing options. WilcoxonRankSum.java will be deleted soon.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5559 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2011-04-02 22:26:35 +00:00
parent 11822da578
commit 09e89c8c97
5 changed files with 47 additions and 43 deletions

View File

@ -12,18 +12,16 @@ import java.util.Arrays;
public class BaseQualityRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities")); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref base qualities")); }
protected void fillQualsFromPileup(byte ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
for ( PileupElement p : pileup ) {
// ignore deletions
if ( p.isDeletion() )
continue;
if ( p.getBase() == ref ) {
refQuals.add((int)p.getQual());
} else if ( (char)p.getBase() == alt ) {
altQuals.add((int)p.getQual());
protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
for ( final PileupElement p : pileup ) {
if( isUsableBase(p) ) {
if ( p.getBase() == ref ) {
refQuals.add((int)p.getQual());
} else if ( p.getBase() == alt ) {
altQuals.add((int)p.getQual());
}
}
}
}

View File

@ -13,18 +13,16 @@ public class MappingQualityRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Read Mapping Qualities")); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref read mapping qualities")); }
protected void fillQualsFromPileup(byte ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
for ( PileupElement p : pileup ) {
// ignore deletions
if ( p.isDeletion() )
continue;
if ( p.getBase() == ref ) {
refQuals.add(p.getMappingQual());
} else if ( (char)p.getBase() == alt ) {
altQuals.add(p.getMappingQual());
protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
for ( final PileupElement p : pileup ) {
if( isUsableBase(p) && p.getMappingQual() < 254 ) { // 254 and 255 are special mapping qualities used as a code by aligners
if ( p.getBase() == ref ) {
refQuals.add(p.getMappingQual());
} else if ( p.getBase() == alt ) {
altQuals.add(p.getMappingQual());
}
}
}
}

View File

@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.List;
@ -16,7 +18,6 @@ import java.util.HashMap;
public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAnnotation {
private final static boolean DEBUG = false;
private static final double minPValue = 1e-20;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
@ -34,14 +35,26 @@ public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAn
final ArrayList<Integer> altQuals = new ArrayList<Integer>();
for ( final Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
if ( !genotype.getValue().isHomRef() ) {
final StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
if ( context == null )
continue;
fillQualsFromPileup(ref.getBase(), vc.getAlternateAllele(0).toString().charAt(0), context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(), refQuals, altQuals);
final StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
if ( context == null ) {
continue;
}
fillQualsFromPileup(ref.getBase(), vc.getAlternateAllele(0).getBases()[0], context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(), refQuals, altQuals);
}
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
for ( final Integer qual : altQuals ) {
mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
}
for ( final Integer qual : refQuals ) {
mannWhitneyU.add(qual, MannWhitneyU.USet.SET2);
}
// we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
final Pair<Double,Double> testResults = mannWhitneyU.runOneSidedTest( MannWhitneyU.USet.SET1 );
double pvalue = testResults.second;
/* // WilcoxonRankSum.java is slated for removal
final WilcoxonRankSum wilcoxon = new WilcoxonRankSum();
for ( final Integer qual : altQuals ) {
wilcoxon.addObservation((double)qual, WilcoxonRankSum.WILCOXON_SET.SET1);
@ -50,21 +63,13 @@ public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAn
wilcoxon.addObservation((double)qual, WilcoxonRankSum.WILCOXON_SET.SET2);
}
// for R debugging
//if ( DEBUG ) {
// wilcoxon.DEBUG = DEBUG;
// System.out.printf("%s%n", ref.getLocus());
// System.out.printf("alt <- c(%s)%n", Utils.join(",", altQuals));
// System.out.printf("ref <- c(%s)%n", Utils.join(",", refQuals));
//}
// we are testing these set1 (the alt bases) have lower quality scores than set2 (the ref bases)
double pvalue = wilcoxon.getPValue(WilcoxonRankSum.WILCOXON_H0.SET1_LT_SET2);
//System.out.println("p = " + pvalue);
//System.out.println();
if ( MathUtils.compareDoubles(pvalue, -1.0) == 0 ) {
pvalue = 1.0;
}
*/
// deal with precision issues
if ( pvalue < minPValue ) {
@ -76,5 +81,9 @@ public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAn
return map;
}
protected abstract void fillQualsFromPileup(byte ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals);
protected abstract void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals);
protected static boolean isUsableBase( final PileupElement p ) {
return !( p.isDeletion() || p.getMappingQual() == 0 || ((int)p.getQual()) < 6 ); // need the unBAQed quality score here
}
}

View File

@ -11,7 +11,6 @@ import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.ca
import org.broadinstitute.sting.utils.MannWhitneyU;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.WilcoxonRankSum;
import org.broadinstitute.sting.utils.collections.Pair;
/**

View File

@ -93,7 +93,7 @@ public class WilcoxonRankSum {
public boolean DEBUG = false;
private static final double NORMAL_MEAN = 0.0;
private static final double NORMAL_SD = 3.0; // Need to spread out the standard normal in order to provide better resolution
private static final double NORMAL_SD = 1.0; // Need to spread out the standard normal in order to provide better resolution
private static final Normal NORMAL = new Normal(NORMAL_MEAN, NORMAL_SD, null);
// The minimum length for both data series (individually) in order to use a normal distribution
@ -266,7 +266,7 @@ public class WilcoxonRankSum {
private class PairComparator implements Comparator<Pair<Double, WILCOXON_SET>>{
public int compare(Pair<Double, WILCOXON_SET> p1, Pair<Double, WILCOXON_SET> p2) {
return (p1.first < p2.first ? -1 : (p1.first == p2.first ? 0 : 1));
return (p1.first < p2.first ? -1 : (p1.first.equals(p2.first) ? 0 : 1));
}
}
}