From 09e89c8c9739a7d0eef5c77e6969a5fe120d1ba5 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Sat, 2 Apr 2011 22:26:35 +0000 Subject: [PATCH] Adding ReadPos rank sum test. Transitioned rank sum tests over to using Chris's implementation in order to harmonize the codebase. There isn't any reason to have competing implementations of rank sum. Thanks to Chris for adding the necessary hypothesis testing options. WilcoxonRankSum.java will be deleted soon. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5559 348d0f76-0448-11de-a6fe-93d51630548a --- .../annotator/BaseQualityRankSumTest.java | 20 ++++----- .../annotator/MappingQualityRankSumTest.java | 20 ++++----- .../gatk/walkers/annotator/RankSumTest.java | 45 +++++++++++-------- .../association/AssociationTestRunner.java | 1 - .../sting/utils/WilcoxonRankSum.java | 4 +- 5 files changed, 47 insertions(+), 43 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index 3eb40685f..3d5f088f6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -12,18 +12,16 @@ import java.util.Arrays; public class BaseQualityRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("BaseQRankSum"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref base qualities")); } - protected void fillQualsFromPileup(byte ref, char alt, ReadBackedPileup pileup, List refQuals, List altQuals) { - for ( PileupElement p : pileup ) { - // ignore deletions - if ( p.isDeletion() ) - continue; - - if ( p.getBase() == ref ) { - refQuals.add((int)p.getQual()); - } else if ( (char)p.getBase() == alt ) { - altQuals.add((int)p.getQual()); + protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals) { + for ( final PileupElement p : pileup ) { + if( isUsableBase(p) ) { + if ( p.getBase() == ref ) { + refQuals.add((int)p.getQual()); + } else if ( p.getBase() == alt ) { + altQuals.add((int)p.getQual()); + } } } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 7036d15c6..2d130e939 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -13,18 +13,16 @@ public class MappingQualityRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("MQRankSum"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Read Mapping Qualities")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref read mapping qualities")); } - protected void fillQualsFromPileup(byte ref, char alt, ReadBackedPileup pileup, List refQuals, List altQuals) { - for ( PileupElement p : pileup ) { - // ignore deletions - if ( p.isDeletion() ) - continue; - - if ( p.getBase() == ref ) { - refQuals.add(p.getMappingQual()); - } else if ( (char)p.getBase() == alt ) { - altQuals.add(p.getMappingQual()); + protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals) { + for ( final PileupElement p : pileup ) { + if( isUsableBase(p) && p.getMappingQual() < 254 ) { // 254 and 255 are special mapping qualities used as a code by aligners + if ( p.getBase() == ref ) { + refQuals.add(p.getMappingQual()); + } else if ( p.getBase() == alt ) { + altQuals.add(p.getMappingQual()); + } } } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 75d8e05b7..47461cd9f 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.List; @@ -16,7 +18,6 @@ import java.util.HashMap; public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAnnotation { - private final static boolean DEBUG = false; private static final double minPValue = 1e-20; public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { @@ -34,14 +35,26 @@ public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAn final ArrayList altQuals = new ArrayList(); for ( final Map.Entry genotype : genotypes.entrySet() ) { - if ( !genotype.getValue().isHomRef() ) { - final StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey()); - if ( context == null ) - continue; - - fillQualsFromPileup(ref.getBase(), vc.getAlternateAllele(0).toString().charAt(0), context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(), refQuals, altQuals); + final StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey()); + if ( context == null ) { + continue; } + fillQualsFromPileup(ref.getBase(), vc.getAlternateAllele(0).getBases()[0], context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(), refQuals, altQuals); } + + final MannWhitneyU mannWhitneyU = new MannWhitneyU(); + for ( final Integer qual : altQuals ) { + mannWhitneyU.add(qual, MannWhitneyU.USet.SET1); + } + for ( final Integer qual : refQuals ) { + mannWhitneyU.add(qual, MannWhitneyU.USet.SET2); + } + + // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases) + final Pair testResults = mannWhitneyU.runOneSidedTest( MannWhitneyU.USet.SET1 ); + double pvalue = testResults.second; + + /* // WilcoxonRankSum.java is slated for removal final WilcoxonRankSum wilcoxon = new WilcoxonRankSum(); for ( final Integer qual : altQuals ) { wilcoxon.addObservation((double)qual, WilcoxonRankSum.WILCOXON_SET.SET1); @@ -50,21 +63,13 @@ public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAn wilcoxon.addObservation((double)qual, WilcoxonRankSum.WILCOXON_SET.SET2); } - // for R debugging - //if ( DEBUG ) { - // wilcoxon.DEBUG = DEBUG; - // System.out.printf("%s%n", ref.getLocus()); - // System.out.printf("alt <- c(%s)%n", Utils.join(",", altQuals)); - // System.out.printf("ref <- c(%s)%n", Utils.join(",", refQuals)); - //} - // we are testing these set1 (the alt bases) have lower quality scores than set2 (the ref bases) double pvalue = wilcoxon.getPValue(WilcoxonRankSum.WILCOXON_H0.SET1_LT_SET2); - //System.out.println("p = " + pvalue); - //System.out.println(); + if ( MathUtils.compareDoubles(pvalue, -1.0) == 0 ) { pvalue = 1.0; } + */ // deal with precision issues if ( pvalue < minPValue ) { @@ -76,5 +81,9 @@ public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAn return map; } - protected abstract void fillQualsFromPileup(byte ref, char alt, ReadBackedPileup pileup, List refQuals, List altQuals); + protected abstract void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals); + + protected static boolean isUsableBase( final PileupElement p ) { + return !( p.isDeletion() || p.getMappingQual() == 0 || ((int)p.getQual()) < 6 ); // need the unBAQed quality score here + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java index 193e3c883..cdf19fea4 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java @@ -11,7 +11,6 @@ import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.ca import org.broadinstitute.sting.utils.MannWhitneyU; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.WilcoxonRankSum; import org.broadinstitute.sting.utils.collections.Pair; /** diff --git a/java/src/org/broadinstitute/sting/utils/WilcoxonRankSum.java b/java/src/org/broadinstitute/sting/utils/WilcoxonRankSum.java index 5ba70e9a7..ecd470544 100755 --- a/java/src/org/broadinstitute/sting/utils/WilcoxonRankSum.java +++ b/java/src/org/broadinstitute/sting/utils/WilcoxonRankSum.java @@ -93,7 +93,7 @@ public class WilcoxonRankSum { public boolean DEBUG = false; private static final double NORMAL_MEAN = 0.0; - private static final double NORMAL_SD = 3.0; // Need to spread out the standard normal in order to provide better resolution + private static final double NORMAL_SD = 1.0; // Need to spread out the standard normal in order to provide better resolution private static final Normal NORMAL = new Normal(NORMAL_MEAN, NORMAL_SD, null); // The minimum length for both data series (individually) in order to use a normal distribution @@ -266,7 +266,7 @@ public class WilcoxonRankSum { private class PairComparator implements Comparator>{ public int compare(Pair p1, Pair p2) { - return (p1.first < p2.first ? -1 : (p1.first == p2.first ? 0 : 1)); + return (p1.first < p2.first ? -1 : (p1.first.equals(p2.first) ? 0 : 1)); } } } \ No newline at end of file