diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index 3eb40685f..3d5f088f6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -12,18 +12,16 @@ import java.util.Arrays; public class BaseQualityRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("BaseQRankSum"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref base qualities")); } - protected void fillQualsFromPileup(byte ref, char alt, ReadBackedPileup pileup, List refQuals, List altQuals) { - for ( PileupElement p : pileup ) { - // ignore deletions - if ( p.isDeletion() ) - continue; - - if ( p.getBase() == ref ) { - refQuals.add((int)p.getQual()); - } else if ( (char)p.getBase() == alt ) { - altQuals.add((int)p.getQual()); + protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals) { + for ( final PileupElement p : pileup ) { + if( isUsableBase(p) ) { + if ( p.getBase() == ref ) { + refQuals.add((int)p.getQual()); + } else if ( p.getBase() == alt ) { + altQuals.add((int)p.getQual()); + } } } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 7036d15c6..2d130e939 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -13,18 +13,16 @@ public class MappingQualityRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("MQRankSum"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Read Mapping Qualities")); } + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref read mapping qualities")); } - protected void fillQualsFromPileup(byte ref, char alt, ReadBackedPileup pileup, List refQuals, List altQuals) { - for ( PileupElement p : pileup ) { - // ignore deletions - if ( p.isDeletion() ) - continue; - - if ( p.getBase() == ref ) { - refQuals.add(p.getMappingQual()); - } else if ( (char)p.getBase() == alt ) { - altQuals.add(p.getMappingQual()); + protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals) { + for ( final PileupElement p : pileup ) { + if( isUsableBase(p) && p.getMappingQual() < 254 ) { // 254 and 255 are special mapping qualities used as a code by aligners + if ( p.getBase() == ref ) { + refQuals.add(p.getMappingQual()); + } else if ( p.getBase() == alt ) { + altQuals.add(p.getMappingQual()); + } } } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 75d8e05b7..47461cd9f 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.List; @@ -16,7 +18,6 @@ import java.util.HashMap; public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAnnotation { - private final static boolean DEBUG = false; private static final double minPValue = 1e-20; public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { @@ -34,14 +35,26 @@ public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAn final ArrayList altQuals = new ArrayList(); for ( final Map.Entry genotype : genotypes.entrySet() ) { - if ( !genotype.getValue().isHomRef() ) { - final StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey()); - if ( context == null ) - continue; - - fillQualsFromPileup(ref.getBase(), vc.getAlternateAllele(0).toString().charAt(0), context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(), refQuals, altQuals); + final StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey()); + if ( context == null ) { + continue; } + fillQualsFromPileup(ref.getBase(), vc.getAlternateAllele(0).getBases()[0], context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(), refQuals, altQuals); } + + final MannWhitneyU mannWhitneyU = new MannWhitneyU(); + for ( final Integer qual : altQuals ) { + mannWhitneyU.add(qual, MannWhitneyU.USet.SET1); + } + for ( final Integer qual : refQuals ) { + mannWhitneyU.add(qual, MannWhitneyU.USet.SET2); + } + + // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases) + final Pair testResults = mannWhitneyU.runOneSidedTest( MannWhitneyU.USet.SET1 ); + double pvalue = testResults.second; + + /* // WilcoxonRankSum.java is slated for removal final WilcoxonRankSum wilcoxon = new WilcoxonRankSum(); for ( final Integer qual : altQuals ) { wilcoxon.addObservation((double)qual, WilcoxonRankSum.WILCOXON_SET.SET1); @@ -50,21 +63,13 @@ public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAn wilcoxon.addObservation((double)qual, WilcoxonRankSum.WILCOXON_SET.SET2); } - // for R debugging - //if ( DEBUG ) { - // wilcoxon.DEBUG = DEBUG; - // System.out.printf("%s%n", ref.getLocus()); - // System.out.printf("alt <- c(%s)%n", Utils.join(",", altQuals)); - // System.out.printf("ref <- c(%s)%n", Utils.join(",", refQuals)); - //} - // we are testing these set1 (the alt bases) have lower quality scores than set2 (the ref bases) double pvalue = wilcoxon.getPValue(WilcoxonRankSum.WILCOXON_H0.SET1_LT_SET2); - //System.out.println("p = " + pvalue); - //System.out.println(); + if ( MathUtils.compareDoubles(pvalue, -1.0) == 0 ) { pvalue = 1.0; } + */ // deal with precision issues if ( pvalue < minPValue ) { @@ -76,5 +81,9 @@ public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAn return map; } - protected abstract void fillQualsFromPileup(byte ref, char alt, ReadBackedPileup pileup, List refQuals, List altQuals); + protected abstract void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals); + + protected static boolean isUsableBase( final PileupElement p ) { + return !( p.isDeletion() || p.getMappingQual() == 0 || ((int)p.getQual()) < 6 ); // need the unBAQed quality score here + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java index 193e3c883..cdf19fea4 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java @@ -11,7 +11,6 @@ import org.broadinstitute.sting.oneoffprojects.walkers.association.statistics.ca import org.broadinstitute.sting.utils.MannWhitneyU; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.WilcoxonRankSum; import org.broadinstitute.sting.utils.collections.Pair; /** diff --git a/java/src/org/broadinstitute/sting/utils/WilcoxonRankSum.java b/java/src/org/broadinstitute/sting/utils/WilcoxonRankSum.java index 5ba70e9a7..ecd470544 100755 --- a/java/src/org/broadinstitute/sting/utils/WilcoxonRankSum.java +++ b/java/src/org/broadinstitute/sting/utils/WilcoxonRankSum.java @@ -93,7 +93,7 @@ public class WilcoxonRankSum { public boolean DEBUG = false; private static final double NORMAL_MEAN = 0.0; - private static final double NORMAL_SD = 3.0; // Need to spread out the standard normal in order to provide better resolution + private static final double NORMAL_SD = 1.0; // Need to spread out the standard normal in order to provide better resolution private static final Normal NORMAL = new Normal(NORMAL_MEAN, NORMAL_SD, null); // The minimum length for both data series (individually) in order to use a normal distribution @@ -266,7 +266,7 @@ public class WilcoxonRankSum { private class PairComparator implements Comparator>{ public int compare(Pair p1, Pair p2) { - return (p1.first < p2.first ? -1 : (p1.first == p2.first ? 0 : 1)); + return (p1.first < p2.first ? -1 : (p1.first.equals(p2.first) ? 0 : 1)); } } } \ No newline at end of file