diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ResidualQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ResidualQuality.java new file mode 100644 index 000000000..dbda3c2c6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ResidualQuality.java @@ -0,0 +1,88 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; + +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Nov 23, 2009 + * Time: 1:39:39 PM + * To change this template use File | Settings | File Templates. + */ +public class ResidualQuality implements VariantAnnotation{ + private static double EPSILON = Math.pow(10,-12); + public static String KEY_NAME = "ResidualQuality"; + + public boolean useZeroQualityReads() { return true; } // for robustness + + public Pair annotate( ReferenceContext ref, ReadBackedPileup p, List genotypes) { + + Character snp = getSNPChar(ref, genotypes); + if ( snp == null ) { + return null; + } + + Double logResidQual = getLogResidualQuality(p,ref.getBase(),snp); + + if ( logResidQual == null ) { + return null; + } + + return new Pair(KEY_NAME, String.format("%f", logResidQual )); + } + + private Double getLogResidualQuality( ReadBackedPileup p, char ref, char snp ) { + byte[] pbp = p.getBases(); + byte[] quals = p.getQuals(); + if ( pbp == null || quals == null ) { + return null; + } + + int nSNP = 0; + int nRef = 0; + int pileupSize = pbp.length; + int sumOfQuals = 0; + for ( int i = 0; i < pileupSize; i ++ ) { + if ( BaseUtils.basesAreEqual( pbp[i], (byte) ref ) ) { + // ref site + nRef ++; + } else if ( BaseUtils.basesAreEqual ( pbp[i], ( byte ) snp ) ) { + // snp site + nSNP++; + } else { + // non-ref non-snp site, increase quality + sumOfQuals += quals[i]; + } + } + + // want to return sum of qualities times the proportion of non-SNP bases they account for (includes ref bases) + // taking the log gives log(SQ) + log(non-ref non-snp bases) - log(non-snp bases) + + return Math.log(sumOfQuals + EPSILON) + Math.log(pileupSize-nSNP-nRef+EPSILON) - Math.log(pileupSize-nSNP + EPSILON); + } + + private Character getSNPChar( ReferenceContext ref, List genotypes ) { + try { + return getNonref( genotypes, ref.getBase() ); + } catch ( IllegalStateException e ) { + return null; + } + } + + private char getNonref(List genotypes, char ref) { + //logger.info(genotypes.size()); + for ( Genotype g : genotypes ) { + //logger.info("Genotype: "+g.getBases()+" Ref from genotype: "+g.getReference()+" Ref from method: "+ref); + if ( g.isVariant(ref) ) { + return g.toVariation(ref).getAlternativeBaseForSNP(); + } + } + throw new IllegalStateException("List of genotypes did not contain a variant."); + } +} diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 395566ea8..46fb9cccb 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("ac80f5a5ab06037f938861effcceb66f")); + Arrays.asList("3b54731a929134410f167630ec434310")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("227f38b2f322780187eacd0c98ace3e6")); + Arrays.asList("c713d6d6cf922eab4150737f51f13abc")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("cda6af61023e67ce8cc8d57cc6cd155b")); + Arrays.asList("0feb1dd8aae945ce5f05c1156acadd3c")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("1ed34e09afbc25e034597a97c7861077")); + Arrays.asList("b15b965af6d204cbb2926f59f68d987c")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); }