New annotation: ResidualQuality
Computes a metric for how much error is left that isn't explained by ref or snp bases. This is the sum of Q scores, weighted by the proportion of non-ref non-snp bases to non-snp bases. Reported in Log space. Update to the integration test so bamboo doesn't look as though someone murdered it with a spork git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2124 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
70059a0fc9
commit
23983b2fd8
|
|
@ -0,0 +1,88 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Nov 23, 2009
|
||||
* Time: 1:39:39 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ResidualQuality implements VariantAnnotation{
|
||||
private static double EPSILON = Math.pow(10,-12);
|
||||
public static String KEY_NAME = "ResidualQuality";
|
||||
|
||||
public boolean useZeroQualityReads() { return true; } // for robustness
|
||||
|
||||
public Pair<String,String> annotate( ReferenceContext ref, ReadBackedPileup p, List<Genotype> genotypes) {
|
||||
|
||||
Character snp = getSNPChar(ref, genotypes);
|
||||
if ( snp == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Double logResidQual = getLogResidualQuality(p,ref.getBase(),snp);
|
||||
|
||||
if ( logResidQual == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new Pair<String,String>(KEY_NAME, String.format("%f", logResidQual ));
|
||||
}
|
||||
|
||||
private Double getLogResidualQuality( ReadBackedPileup p, char ref, char snp ) {
|
||||
byte[] pbp = p.getBases();
|
||||
byte[] quals = p.getQuals();
|
||||
if ( pbp == null || quals == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int nSNP = 0;
|
||||
int nRef = 0;
|
||||
int pileupSize = pbp.length;
|
||||
int sumOfQuals = 0;
|
||||
for ( int i = 0; i < pileupSize; i ++ ) {
|
||||
if ( BaseUtils.basesAreEqual( pbp[i], (byte) ref ) ) {
|
||||
// ref site
|
||||
nRef ++;
|
||||
} else if ( BaseUtils.basesAreEqual ( pbp[i], ( byte ) snp ) ) {
|
||||
// snp site
|
||||
nSNP++;
|
||||
} else {
|
||||
// non-ref non-snp site, increase quality
|
||||
sumOfQuals += quals[i];
|
||||
}
|
||||
}
|
||||
|
||||
// want to return sum of qualities times the proportion of non-SNP bases they account for (includes ref bases)
|
||||
// taking the log gives log(SQ) + log(non-ref non-snp bases) - log(non-snp bases)
|
||||
|
||||
return Math.log(sumOfQuals + EPSILON) + Math.log(pileupSize-nSNP-nRef+EPSILON) - Math.log(pileupSize-nSNP + EPSILON);
|
||||
}
|
||||
|
||||
private Character getSNPChar( ReferenceContext ref, List<Genotype> genotypes ) {
|
||||
try {
|
||||
return getNonref( genotypes, ref.getBase() );
|
||||
} catch ( IllegalStateException e ) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private char getNonref(List<Genotype> genotypes, char ref) {
|
||||
//logger.info(genotypes.size());
|
||||
for ( Genotype g : genotypes ) {
|
||||
//logger.info("Genotype: "+g.getBases()+" Ref from genotype: "+g.getReference()+" Ref from method: "+ref);
|
||||
if ( g.isVariant(ref) ) {
|
||||
return g.toVariation(ref).getAlternativeBaseForSNP();
|
||||
}
|
||||
}
|
||||
throw new IllegalStateException("List of genotypes did not contain a variant.");
|
||||
}
|
||||
}
|
||||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("ac80f5a5ab06037f938861effcceb66f"));
|
||||
Arrays.asList("3b54731a929134410f167630ec434310"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("227f38b2f322780187eacd0c98ace3e6"));
|
||||
Arrays.asList("c713d6d6cf922eab4150737f51f13abc"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("cda6af61023e67ce8cc8d57cc6cd155b"));
|
||||
Arrays.asList("0feb1dd8aae945ce5f05c1156acadd3c"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -all -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("1ed34e09afbc25e034597a97c7861077"));
|
||||
Arrays.asList("b15b965af6d204cbb2926f59f68d987c"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue