Second base skew annotations and integration tests. Nothing need be given except -A SecondBaseSkew; the statistic it annotates calls with is a chi-square statistic given by the deviation of the observed proportion of reference second-best-bases from the expected 1/3. Future additions may be to ask that the deviation be instead from a given transition table.

A big note for all users: All IllegalStateExceptions from the variation ROD (e.g. the RodGeliText) are dealt with SILENTLY. I understand this isn't optimal, but I'd rather simply not annotate a non-bi-allelic site than fail completely (there are quite a few such sites even on the regions over which the integration test has been written).




git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2064 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-11-18 00:11:13 +00:00
parent 42a0bbaf46
commit 539f6f15e5
4 changed files with 174 additions and 1 deletions

View File

@ -0,0 +1,97 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Nov 16, 2009
* Time: 11:25:51 AM
* To change this template use File | Settings | File Templates.
*/
public class SecondBaseSkew implements VariantAnnotation{
private static double epsilon = Math.pow(10.0,-12);
private static boolean USE_ZERO_QUALITY_READS = true;
private static String KEY_NAME = "2b_Chi";
private static double[] UNIFORM_ON_OFF_RATIO = {1.0/3, 2.0/3};
private double[] proportionExpectations = UNIFORM_ON_OFF_RATIO;
public boolean useZeroQualityReads() { return USE_ZERO_QUALITY_READS; }
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
double chi_square;
Pair<Integer,Double> depthProp = getSecondaryPileupNonrefEstimator(pileup,genotypes);
if ( depthProp == null ) {
return null;
} else {
double p_transformed = transform(depthProp.getSecond(), depthProp.getFirst());
double expected_transformed = transform(proportionExpectations[0], depthProp.getFirst());
// System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst());
// System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]);
chi_square = Math.signum(depthProp.getSecond() - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE);
}
return new Pair<String,String>(KEY_NAME,String.format("%f", chi_square));
}
private double transform( double proportion, int depth ) {
proportion = proportion - epsilon;
return proportion / ( Math.sqrt ( proportion*(1-proportion)/depth ) );
}
private Pair<Integer, Double> getSecondaryPileupNonrefEstimator(ReadBackedPileup p, List<Genotype> genotypes) {
char snp;
try {
snp = getNonref(genotypes, p.getRef());
} catch ( IllegalStateException e ) {
// tri-allelic site
// System.out.println("Illegal State Exception caught at "+p.getLocation().toString()+" 2bb skew annotation suppressed ("+e.getLocalizedMessage()+")");
return null;
}
int variantDepth = 0;
int variantsWithRefSecondBase = 0;
char[] primaryPileup = p.getBases().toCharArray();
String secondBasePileup = p.getSecondaryBasePileup();
if ( secondBasePileup == null ) {
// System.out.println("Warning: Second base pileup is null at "+p.getLocation().toString());
return null;
} else {
char [] secondaryPileup = secondBasePileup.toCharArray();
//System.out.printf("primary=%d secondary=%d locus=%s%n", primaryPileup.length, secondaryPileup.length, p.getLocation().toString());
for ( int i = 0; i < primaryPileup.length; i ++ ) {
if ( BaseUtils.basesAreEqual((byte) primaryPileup[i], (byte) snp) ) {
variantDepth++;
if ( BaseUtils.basesAreEqual((byte) secondaryPileup[i], (byte) p.getRef()) ) {
variantsWithRefSecondBase++;
}
}
}
double biasedProportion = ( 1.0 + variantsWithRefSecondBase )/(1.0 + variantDepth );
return new Pair<Integer,Double>(variantDepth+1, biasedProportion);
}
}
private char getNonref(List<Genotype> genotypes, char ref) {
for ( Genotype g : genotypes ) {
if ( g.isVariant(ref) ) {
return g.toVariation(ref).getAlternativeBaseForSNP();
}
}
throw new StingException("List of genotypes did not contain a variant.");
}
}

View File

@ -201,8 +201,10 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
for ( VariantAnnotation annotator : annotations) {
Pair<String, String> annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), genotypes);
if ( annot != null )
if ( annot != null ) {
// System.out.println("Annotating: First="+annot.getFirst()+" Second="+annot.getSecond());
results.put(annot.first, annot.second);
}
}
return results;

View File

@ -0,0 +1,28 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.Arrays;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Nov 17, 2009
* Time: 6:58:09 PM
* To change this template use File | Settings | File Templates.
*/
public class SecondBaseSkewIntegrationTest extends WalkerTest {
@Test
public void secondBaseSkewTest() {
for ( int test = 1; test <= 5; test ++ ) {
String bamFilePath = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.recal.annotation_subset.bam";
String callFile = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.ssg1b.geli.calls";
String args = VariantAnnotatorIntegrationTest.secondBaseTestString()+" -I "+bamFilePath+" -B variant,Variants,"+callFile+" "+VariantAnnotatorIntegrationTest.secondBaseTestInterval(test)+" -sample variant";
WalkerTestSpec spec = new WalkerTestSpec(args,1, Arrays.asList(VariantAnnotatorIntegrationTest.secondBaseTestmd5(test)));
executeTest("Second base skew annotator, test number "+Integer.toString(test), spec);
}
}
}

View File

@ -1,15 +1,60 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.StingException;
import org.junit.Test;
import java.util.Arrays;
public class VariantAnnotatorIntegrationTest extends WalkerTest {
public static String secondBaseTestString() {
return "-T VariantAnnotator -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -vcf %s -A SecondBaseSkew";
}
public static String validationDataPath() {
return "/humgen/gsa-scr1/GATK_Data/Validation_Data/";
}
public static String secondBaseTestFile( int testNo ) {
switch ( testNo ) {
case 1: return "NA12891";
case 2: return "NA12842";
case 3: return "NA18510";
case 4: return "NA18960";
case 5: return "NA20762";
default: throw new StingException("Impossible test has been run: secondbasetest number "+ testNo);
}
}
public static String secondBaseTestInterval ( int testNo ) {
switch ( testNo ) {
case 1: return "-L chr1:14,000,000-18,000,000";
case 2: return "-L chr2:10,000,000-18,000,000";
case 3: return "-L chr1:10,000,000-20,000,000";
case 4: return "-L chr1:10,000,000-15,000,000 -L chr2:5,000,000-12,000,000 -L chr3:4,000,000-14,000,000";
case 5: return "-L chr20 -L chr21 -L chr22";
default: throw new StingException("Impossible test has been run: secondbasetest number "+testNo);
}
}
public static String secondBaseTestmd5( int testNo ) {
switch ( testNo ) {
case 1: return "48adec9c7fa6d3f5f5647b8eababd4e3";
case 2: return "2b62ad3444bd16a9809ed2df5d6c66ca";
case 3: return "90a24b6103c3d7b40ab41204be33005d";
case 4: return "cc62790f15fb070a59b04cdb94fdd0e9";
case 5: return "10cffb288bcd0bc992a2603ff0a0e02e";
default: throw new StingException("Impossible test has been run: secondbasetest number "+testNo);
}
}
public static String baseTestString() {
return "-T VariantAnnotator -R /broad/1KG/reference/human_b36_both.fasta -vcf %s";
}
@Test
public void testHasAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
@ -73,4 +118,5 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
Arrays.asList("1ed34e09afbc25e034597a97c7861077"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
}