1. Fixed small but embarrassing bug in weighted Allele Balance annotation calculation.

2. Made RankSumTest abstract; added 2 subclasses: BaseQualityRST and MappingQualityRST (the latter based on a suggestion from Mark Daly).  Untested so they're still experimental.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2561 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-01-12 18:33:53 +00:00
parent 58999a8e9d
commit 03b7d5f5c7
6 changed files with 87 additions and 41 deletions

View File

@ -21,8 +21,8 @@ public class AlleleBalance extends StandardVariantAnnotation {
if ( genotypes == null || genotypes.size() == 0 )
return null;
int refCount = 0;
int altCount = 0;
double ratio = 0.0;
double totalWeights = 0.0;
for ( Genotype genotype : genotypes ) {
// we care only about het calls
if ( !genotype.isHet() )
@ -49,17 +49,23 @@ public class AlleleBalance extends StandardVariantAnnotation {
int aCount = Utils.countOccurrences(a, bases);
int bCount = Utils.countOccurrences(b, bases);
int refCount = (a == ref.getBase() ? aCount : bCount);
int altCount = (a == ref.getBase() ? bCount : aCount);
// sanity check
if ( refCount + altCount == 0 )
continue;
// weight the allele balance by genotype quality so that e.g. mis-called homs don't affect the ratio too much
refCount += genotype.getNegLog10PError() * (a == ref.getBase() ? aCount : bCount);
altCount += genotype.getNegLog10PError() * (a == ref.getBase() ? bCount : aCount);
ratio += genotype.getNegLog10PError() * ((double)refCount / (double)(refCount + altCount));
totalWeights += genotype.getNegLog10PError();
}
// sanity check
if ( refCount + altCount == 0 )
// make sure we had a het genotype
if ( MathUtils.compareDoubles(totalWeights, 0.0) == 0 )
return null;
double ratio = (double)refCount / (double)(refCount + altCount);
return String.format("%.2f", ratio);
return String.format("%.2f", (ratio / totalWeights));
}
public String getKeyName() { return "AB"; }

View File

@ -0,0 +1,29 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.List;
public class BaseQualityRankSumTest extends RankSumTest {
public String getKeyName() { return "BaseQRankSum"; }
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("BaseQRankSum", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities"); }
protected void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
for ( PileupElement p : pileup ) {
// ignore deletions
if ( p.isDeletion() )
continue;
char base = (char)p.getBase();
if ( base == ref )
refQuals.add((int)p.getQual());
else if ( base == alt )
altQuals.add((int)p.getQual());
}
}
}

View File

@ -0,0 +1,29 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.List;
public class MappingQualityRankSumTest extends RankSumTest {
public String getKeyName() { return "MQRankSum"; }
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("MQRankSum", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Read Mapping Qualities"); }
protected void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
for ( PileupElement p : pileup ) {
// ignore deletions
if ( p.isDeletion() )
continue;
char base = (char)p.getBase();
if ( base == ref )
refQuals.add(p.getMappingQual());
else if ( base == alt )
altQuals.add(p.getMappingQual());
}
}
}

View File

@ -5,16 +5,14 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
public class RankSumTest implements VariantAnnotation {
public abstract class RankSumTest implements VariantAnnotation {
private final static boolean DEBUG = false;
private static final double minPValue = 1e-10;
@ -68,21 +66,5 @@ public class RankSumTest implements VariantAnnotation {
return String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue));
}
public String getKeyName() { return "RankSum"; }
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("RankSum", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities"); }
private void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
for ( PileupElement p : pileup ) {
// ignore deletions
if ( p.isDeletion() )
continue;
char base = (char)p.getBase();
if ( base == ref )
refQuals.add((int)p.getQual());
else if ( base == alt )
altQuals.add((int)p.getQual());
}
}
protected abstract void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals);
}

View File

@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("e3282acfec015441dfc38715d5bfe530"));
Arrays.asList("514d6357ae4970a422d0481f57f52e0c"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("17119c073a964620809eaaf3a39dfbc4"));
Arrays.asList("903c0a9642a34ed19dadee72ddcf4189"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("20943a3666ceef605d9154b6f2ff2fa1"));
Arrays.asList("90861b8af4514f8e64e4d93fe0fcd304"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("e865132880c6aa07bfdd725408fc21e8"));
Arrays.asList("0e9c65b14ef694bb3f246e58515dbdce"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}

View File

@ -56,7 +56,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("0078a4e6611cbfc7225f52209b87c4aa"));
Arrays.asList("eee834f4242d72fffb09cd248bd8ed86"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("9c862cf36124710648d6d8b83882c603"));
Arrays.asList("ee5c07b4e57966bf173dea100f1e89e0"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -72,7 +72,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("4f6aeda8903be49344353b79ee7763b4"));
Arrays.asList("15d2e6040933704547a39a531869f36d"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}
@ -85,7 +85,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testParallelization() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 -nt 4", 1,
Arrays.asList("dbaff01edcfc8d83efa937f5887b26c2"));
Arrays.asList("463c5c9cf9a10f8b737184c10c199783"));
executeTest("test parallelization", spec);
}
@ -98,11 +98,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "c6abe711dade515e72b671d7e0d6e724" );
e.put( "-all_bases", "d9705e4c89f333f3fa6e771716db57e4" );
e.put( "-genotype", "b69545bb707edbb033307aab0aecd85a" );
e.put( "-all_bases", "7a2a691e8eda96e28c0868c87bba9402" );
e.put( "--min_base_quality_score 26", "015509aca82954b89d7dc83134f2103e" );
e.put( "--min_mapping_quality_score 26", "a3926d2d24f9b4b4d772243a50ebdd59" );
e.put( "--max_mismatches_in_40bp_window 5", "ab592c8d0996d9bc4bb6685eaf874883" );
e.put( "--min_mapping_quality_score 26", "227bff531007ea610577be5b2696c8e4" );
e.put( "--max_mismatches_in_40bp_window 5", "7b294de04436a2362c5536fa8af83da4" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -116,7 +116,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1,
Arrays.asList("15af52b80e2be8e8d2d489c3f86bdadc"));
Arrays.asList("c2e4ebd66901d6160dc38cdaa29b89f3"));
executeTest("testConfidence", spec);
}