1. Fixed small but embarrassing bug in weighted Allele Balance annotation calculation.
2. Made RankSumTest abstract; added 2 subclasses: BaseQualityRST and MappingQualityRST (the latter based on a suggestion from Mark Daly). Untested so they're still experimental. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2561 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
58999a8e9d
commit
03b7d5f5c7
|
|
@ -21,8 +21,8 @@ public class AlleleBalance extends StandardVariantAnnotation {
|
|||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
int refCount = 0;
|
||||
int altCount = 0;
|
||||
double ratio = 0.0;
|
||||
double totalWeights = 0.0;
|
||||
for ( Genotype genotype : genotypes ) {
|
||||
// we care only about het calls
|
||||
if ( !genotype.isHet() )
|
||||
|
|
@ -49,17 +49,23 @@ public class AlleleBalance extends StandardVariantAnnotation {
|
|||
int aCount = Utils.countOccurrences(a, bases);
|
||||
int bCount = Utils.countOccurrences(b, bases);
|
||||
|
||||
int refCount = (a == ref.getBase() ? aCount : bCount);
|
||||
int altCount = (a == ref.getBase() ? bCount : aCount);
|
||||
|
||||
// sanity check
|
||||
if ( refCount + altCount == 0 )
|
||||
continue;
|
||||
|
||||
// weight the allele balance by genotype quality so that e.g. mis-called homs don't affect the ratio too much
|
||||
refCount += genotype.getNegLog10PError() * (a == ref.getBase() ? aCount : bCount);
|
||||
altCount += genotype.getNegLog10PError() * (a == ref.getBase() ? bCount : aCount);
|
||||
ratio += genotype.getNegLog10PError() * ((double)refCount / (double)(refCount + altCount));
|
||||
totalWeights += genotype.getNegLog10PError();
|
||||
}
|
||||
|
||||
// sanity check
|
||||
if ( refCount + altCount == 0 )
|
||||
// make sure we had a het genotype
|
||||
if ( MathUtils.compareDoubles(totalWeights, 0.0) == 0 )
|
||||
return null;
|
||||
|
||||
double ratio = (double)refCount / (double)(refCount + altCount);
|
||||
return String.format("%.2f", ratio);
|
||||
return String.format("%.2f", (ratio / totalWeights));
|
||||
}
|
||||
|
||||
public String getKeyName() { return "AB"; }
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class BaseQualityRankSumTest extends RankSumTest {
|
||||
|
||||
public String getKeyName() { return "BaseQRankSum"; }
|
||||
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("BaseQRankSum", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities"); }
|
||||
|
||||
protected void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
|
||||
for ( PileupElement p : pileup ) {
|
||||
// ignore deletions
|
||||
if ( p.isDeletion() )
|
||||
continue;
|
||||
|
||||
char base = (char)p.getBase();
|
||||
if ( base == ref )
|
||||
refQuals.add((int)p.getQual());
|
||||
else if ( base == alt )
|
||||
altQuals.add((int)p.getQual());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class MappingQualityRankSumTest extends RankSumTest {
|
||||
|
||||
public String getKeyName() { return "MQRankSum"; }
|
||||
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("MQRankSum", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Read Mapping Qualities"); }
|
||||
|
||||
protected void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
|
||||
for ( PileupElement p : pileup ) {
|
||||
// ignore deletions
|
||||
if ( p.isDeletion() )
|
||||
continue;
|
||||
|
||||
char base = (char)p.getBase();
|
||||
if ( base == ref )
|
||||
refQuals.add(p.getMappingQual());
|
||||
else if ( base == alt )
|
||||
altQuals.add(p.getMappingQual());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -5,16 +5,14 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class RankSumTest implements VariantAnnotation {
|
||||
public abstract class RankSumTest implements VariantAnnotation {
|
||||
private final static boolean DEBUG = false;
|
||||
private static final double minPValue = 1e-10;
|
||||
|
||||
|
|
@ -68,21 +66,5 @@ public class RankSumTest implements VariantAnnotation {
|
|||
return String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue));
|
||||
}
|
||||
|
||||
public String getKeyName() { return "RankSum"; }
|
||||
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("RankSum", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities"); }
|
||||
|
||||
private void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals) {
|
||||
for ( PileupElement p : pileup ) {
|
||||
// ignore deletions
|
||||
if ( p.isDeletion() )
|
||||
continue;
|
||||
|
||||
char base = (char)p.getBase();
|
||||
if ( base == ref )
|
||||
refQuals.add((int)p.getQual());
|
||||
else if ( base == alt )
|
||||
altQuals.add((int)p.getQual());
|
||||
}
|
||||
}
|
||||
protected abstract void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals);
|
||||
}
|
||||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("e3282acfec015441dfc38715d5bfe530"));
|
||||
Arrays.asList("514d6357ae4970a422d0481f57f52e0c"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("17119c073a964620809eaaf3a39dfbc4"));
|
||||
Arrays.asList("903c0a9642a34ed19dadee72ddcf4189"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("20943a3666ceef605d9154b6f2ff2fa1"));
|
||||
Arrays.asList("90861b8af4514f8e64e4d93fe0fcd304"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("e865132880c6aa07bfdd725408fc21e8"));
|
||||
Arrays.asList("0e9c65b14ef694bb3f246e58515dbdce"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("0078a4e6611cbfc7225f52209b87c4aa"));
|
||||
Arrays.asList("eee834f4242d72fffb09cd248bd8ed86"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("9c862cf36124710648d6d8b83882c603"));
|
||||
Arrays.asList("ee5c07b4e57966bf173dea100f1e89e0"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("4f6aeda8903be49344353b79ee7763b4"));
|
||||
Arrays.asList("15d2e6040933704547a39a531869f36d"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -85,7 +85,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testParallelization() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 -nt 4", 1,
|
||||
Arrays.asList("dbaff01edcfc8d83efa937f5887b26c2"));
|
||||
Arrays.asList("463c5c9cf9a10f8b737184c10c199783"));
|
||||
executeTest("test parallelization", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,11 +98,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-genotype", "c6abe711dade515e72b671d7e0d6e724" );
|
||||
e.put( "-all_bases", "d9705e4c89f333f3fa6e771716db57e4" );
|
||||
e.put( "-genotype", "b69545bb707edbb033307aab0aecd85a" );
|
||||
e.put( "-all_bases", "7a2a691e8eda96e28c0868c87bba9402" );
|
||||
e.put( "--min_base_quality_score 26", "015509aca82954b89d7dc83134f2103e" );
|
||||
e.put( "--min_mapping_quality_score 26", "a3926d2d24f9b4b4d772243a50ebdd59" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "ab592c8d0996d9bc4bb6685eaf874883" );
|
||||
e.put( "--min_mapping_quality_score 26", "227bff531007ea610577be5b2696c8e4" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "7b294de04436a2362c5536fa8af83da4" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -116,7 +116,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1,
|
||||
Arrays.asList("15af52b80e2be8e8d2d489c3f86bdadc"));
|
||||
Arrays.asList("c2e4ebd66901d6160dc38cdaa29b89f3"));
|
||||
executeTest("testConfidence", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue