Added --
Second base skew annotations and integration tests. Nothing need be given except -A SecondBaseSkew; the statistic it annotates calls with is a chi-square statistic given by the deviation of the observed proportion of reference second-best-bases from the expected 1/3. Future additions may be to ask that the deviation be instead from a given transition table. A big note for all users: All IllegalStateExceptions from the variation ROD (e.g. the RodGeliText) are dealt with SILENTLY. I understand this isn't optimal, but I'd rather simply not annotate a non-bi-allelic site than fail completely (there are quite a few such sites even on the regions over which the integration test has been written). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2064 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
42a0bbaf46
commit
539f6f15e5
|
|
@ -0,0 +1,97 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Nov 16, 2009
|
||||
* Time: 11:25:51 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SecondBaseSkew implements VariantAnnotation{
|
||||
private static double epsilon = Math.pow(10.0,-12);
|
||||
private static boolean USE_ZERO_QUALITY_READS = true;
|
||||
private static String KEY_NAME = "2b_Chi";
|
||||
private static double[] UNIFORM_ON_OFF_RATIO = {1.0/3, 2.0/3};
|
||||
private double[] proportionExpectations = UNIFORM_ON_OFF_RATIO;
|
||||
|
||||
|
||||
public boolean useZeroQualityReads() { return USE_ZERO_QUALITY_READS; }
|
||||
|
||||
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
|
||||
double chi_square;
|
||||
Pair<Integer,Double> depthProp = getSecondaryPileupNonrefEstimator(pileup,genotypes);
|
||||
if ( depthProp == null ) {
|
||||
return null;
|
||||
} else {
|
||||
double p_transformed = transform(depthProp.getSecond(), depthProp.getFirst());
|
||||
double expected_transformed = transform(proportionExpectations[0], depthProp.getFirst());
|
||||
// System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst());
|
||||
// System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]);
|
||||
chi_square = Math.signum(depthProp.getSecond() - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE);
|
||||
}
|
||||
|
||||
return new Pair<String,String>(KEY_NAME,String.format("%f", chi_square));
|
||||
}
|
||||
|
||||
private double transform( double proportion, int depth ) {
|
||||
proportion = proportion - epsilon;
|
||||
return proportion / ( Math.sqrt ( proportion*(1-proportion)/depth ) );
|
||||
}
|
||||
|
||||
private Pair<Integer, Double> getSecondaryPileupNonrefEstimator(ReadBackedPileup p, List<Genotype> genotypes) {
|
||||
char snp;
|
||||
try {
|
||||
snp = getNonref(genotypes, p.getRef());
|
||||
} catch ( IllegalStateException e ) {
|
||||
// tri-allelic site
|
||||
// System.out.println("Illegal State Exception caught at "+p.getLocation().toString()+" 2bb skew annotation suppressed ("+e.getLocalizedMessage()+")");
|
||||
return null;
|
||||
}
|
||||
int variantDepth = 0;
|
||||
int variantsWithRefSecondBase = 0;
|
||||
char[] primaryPileup = p.getBases().toCharArray();
|
||||
String secondBasePileup = p.getSecondaryBasePileup();
|
||||
|
||||
if ( secondBasePileup == null ) {
|
||||
// System.out.println("Warning: Second base pileup is null at "+p.getLocation().toString());
|
||||
return null;
|
||||
} else {
|
||||
char [] secondaryPileup = secondBasePileup.toCharArray();
|
||||
//System.out.printf("primary=%d secondary=%d locus=%s%n", primaryPileup.length, secondaryPileup.length, p.getLocation().toString());
|
||||
|
||||
for ( int i = 0; i < primaryPileup.length; i ++ ) {
|
||||
if ( BaseUtils.basesAreEqual((byte) primaryPileup[i], (byte) snp) ) {
|
||||
variantDepth++;
|
||||
if ( BaseUtils.basesAreEqual((byte) secondaryPileup[i], (byte) p.getRef()) ) {
|
||||
variantsWithRefSecondBase++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
double biasedProportion = ( 1.0 + variantsWithRefSecondBase )/(1.0 + variantDepth );
|
||||
|
||||
return new Pair<Integer,Double>(variantDepth+1, biasedProportion);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private char getNonref(List<Genotype> genotypes, char ref) {
|
||||
for ( Genotype g : genotypes ) {
|
||||
if ( g.isVariant(ref) ) {
|
||||
return g.toVariation(ref).getAlternativeBaseForSNP();
|
||||
}
|
||||
}
|
||||
|
||||
throw new StingException("List of genotypes did not contain a variant.");
|
||||
}
|
||||
}
|
||||
|
|
@ -201,8 +201,10 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
|
||||
for ( VariantAnnotation annotator : annotations) {
|
||||
Pair<String, String> annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), genotypes);
|
||||
if ( annot != null )
|
||||
if ( annot != null ) {
|
||||
// System.out.println("Annotating: First="+annot.getFirst()+" Second="+annot.getSecond());
|
||||
results.put(annot.first, annot.second);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,28 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Nov 17, 2009
|
||||
* Time: 6:58:09 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SecondBaseSkewIntegrationTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
public void secondBaseSkewTest() {
|
||||
for ( int test = 1; test <= 5; test ++ ) {
|
||||
String bamFilePath = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.recal.annotation_subset.bam";
|
||||
String callFile = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.ssg1b.geli.calls";
|
||||
String args = VariantAnnotatorIntegrationTest.secondBaseTestString()+" -I "+bamFilePath+" -B variant,Variants,"+callFile+" "+VariantAnnotatorIntegrationTest.secondBaseTestInterval(test)+" -sample variant";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(args,1, Arrays.asList(VariantAnnotatorIntegrationTest.secondBaseTestmd5(test)));
|
||||
executeTest("Second base skew annotator, test number "+Integer.toString(test), spec);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,15 +1,60 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||
|
||||
public static String secondBaseTestString() {
|
||||
return "-T VariantAnnotator -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -vcf %s -A SecondBaseSkew";
|
||||
}
|
||||
|
||||
public static String validationDataPath() {
|
||||
return "/humgen/gsa-scr1/GATK_Data/Validation_Data/";
|
||||
}
|
||||
|
||||
public static String secondBaseTestFile( int testNo ) {
|
||||
switch ( testNo ) {
|
||||
case 1: return "NA12891";
|
||||
case 2: return "NA12842";
|
||||
case 3: return "NA18510";
|
||||
case 4: return "NA18960";
|
||||
case 5: return "NA20762";
|
||||
default: throw new StingException("Impossible test has been run: secondbasetest number "+ testNo);
|
||||
}
|
||||
}
|
||||
|
||||
public static String secondBaseTestInterval ( int testNo ) {
|
||||
switch ( testNo ) {
|
||||
case 1: return "-L chr1:14,000,000-18,000,000";
|
||||
case 2: return "-L chr2:10,000,000-18,000,000";
|
||||
case 3: return "-L chr1:10,000,000-20,000,000";
|
||||
case 4: return "-L chr1:10,000,000-15,000,000 -L chr2:5,000,000-12,000,000 -L chr3:4,000,000-14,000,000";
|
||||
case 5: return "-L chr20 -L chr21 -L chr22";
|
||||
default: throw new StingException("Impossible test has been run: secondbasetest number "+testNo);
|
||||
}
|
||||
}
|
||||
|
||||
public static String secondBaseTestmd5( int testNo ) {
|
||||
switch ( testNo ) {
|
||||
case 1: return "48adec9c7fa6d3f5f5647b8eababd4e3";
|
||||
case 2: return "2b62ad3444bd16a9809ed2df5d6c66ca";
|
||||
case 3: return "90a24b6103c3d7b40ab41204be33005d";
|
||||
case 4: return "cc62790f15fb070a59b04cdb94fdd0e9";
|
||||
case 5: return "10cffb288bcd0bc992a2603ff0a0e02e";
|
||||
default: throw new StingException("Impossible test has been run: secondbasetest number "+testNo);
|
||||
}
|
||||
}
|
||||
|
||||
public static String baseTestString() {
|
||||
return "-T VariantAnnotator -R /broad/1KG/reference/human_b36_both.fasta -vcf %s";
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testHasAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
@ -73,4 +118,5 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
Arrays.asList("1ed34e09afbc25e034597a97c7861077"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue