New annotation: ResidualQuality

Computes a metric for how much error is left that isn't explained by ref or snp bases. This is the sum of Q scores, weighted by the proportion of non-ref non-snp bases to non-snp bases. Reported in Log space.

Update to the integration test so bamboo doesn't look as though someone murdered it with a spork




git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2124 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-11-23 20:04:01 +00:00
parent 70059a0fc9
commit 23983b2fd8
2 changed files with 92 additions and 4 deletions

View File

@ -0,0 +1,88 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Nov 23, 2009
* Time: 1:39:39 PM
* To change this template use File | Settings | File Templates.
*/
public class ResidualQuality implements VariantAnnotation{
private static double EPSILON = Math.pow(10,-12);
public static String KEY_NAME = "ResidualQuality";
public boolean useZeroQualityReads() { return true; } // for robustness
public Pair<String,String> annotate( ReferenceContext ref, ReadBackedPileup p, List<Genotype> genotypes) {
Character snp = getSNPChar(ref, genotypes);
if ( snp == null ) {
return null;
}
Double logResidQual = getLogResidualQuality(p,ref.getBase(),snp);
if ( logResidQual == null ) {
return null;
}
return new Pair<String,String>(KEY_NAME, String.format("%f", logResidQual ));
}
private Double getLogResidualQuality( ReadBackedPileup p, char ref, char snp ) {
byte[] pbp = p.getBases();
byte[] quals = p.getQuals();
if ( pbp == null || quals == null ) {
return null;
}
int nSNP = 0;
int nRef = 0;
int pileupSize = pbp.length;
int sumOfQuals = 0;
for ( int i = 0; i < pileupSize; i ++ ) {
if ( BaseUtils.basesAreEqual( pbp[i], (byte) ref ) ) {
// ref site
nRef ++;
} else if ( BaseUtils.basesAreEqual ( pbp[i], ( byte ) snp ) ) {
// snp site
nSNP++;
} else {
// non-ref non-snp site, increase quality
sumOfQuals += quals[i];
}
}
// want to return sum of qualities times the proportion of non-SNP bases they account for (includes ref bases)
// taking the log gives log(SQ) + log(non-ref non-snp bases) - log(non-snp bases)
return Math.log(sumOfQuals + EPSILON) + Math.log(pileupSize-nSNP-nRef+EPSILON) - Math.log(pileupSize-nSNP + EPSILON);
}
private Character getSNPChar( ReferenceContext ref, List<Genotype> genotypes ) {
try {
return getNonref( genotypes, ref.getBase() );
} catch ( IllegalStateException e ) {
return null;
}
}
private char getNonref(List<Genotype> genotypes, char ref) {
//logger.info(genotypes.size());
for ( Genotype g : genotypes ) {
//logger.info("Genotype: "+g.getBases()+" Ref from genotype: "+g.getReference()+" Ref from method: "+ref);
if ( g.isVariant(ref) ) {
return g.toVariation(ref).getAlternativeBaseForSNP();
}
}
throw new IllegalStateException("List of genotypes did not contain a variant.");
}
}

View File

@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("ac80f5a5ab06037f938861effcceb66f"));
Arrays.asList("3b54731a929134410f167630ec434310"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("227f38b2f322780187eacd0c98ace3e6"));
Arrays.asList("c713d6d6cf922eab4150737f51f13abc"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("cda6af61023e67ce8cc8d57cc6cd155b"));
Arrays.asList("0feb1dd8aae945ce5f05c1156acadd3c"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("1ed34e09afbc25e034597a97c7861077"));
Arrays.asList("b15b965af6d204cbb2926f59f68d987c"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}