diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java new file mode 100755 index 000000000..07b17bdc2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java @@ -0,0 +1,97 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; + +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Nov 16, 2009 + * Time: 11:25:51 AM + * To change this template use File | Settings | File Templates. + */ +public class SecondBaseSkew implements VariantAnnotation{ + private static double epsilon = Math.pow(10.0,-12); + private static boolean USE_ZERO_QUALITY_READS = true; + private static String KEY_NAME = "2b_Chi"; + private static double[] UNIFORM_ON_OFF_RATIO = {1.0/3, 2.0/3}; + private double[] proportionExpectations = UNIFORM_ON_OFF_RATIO; + + + public boolean useZeroQualityReads() { return USE_ZERO_QUALITY_READS; } + + public Pair annotate(ReferenceContext ref, ReadBackedPileup pileup, List genotypes) { + double chi_square; + Pair depthProp = getSecondaryPileupNonrefEstimator(pileup,genotypes); + if ( depthProp == null ) { + return null; + } else { + double p_transformed = transform(depthProp.getSecond(), depthProp.getFirst()); + double expected_transformed = transform(proportionExpectations[0], depthProp.getFirst()); + // System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst()); + // System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]); + chi_square = Math.signum(depthProp.getSecond() - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE); + } + + return new Pair(KEY_NAME,String.format("%f", chi_square)); + } + + private double transform( double proportion, int depth ) { + proportion = proportion - epsilon; + return proportion / ( Math.sqrt ( proportion*(1-proportion)/depth ) ); + } + + private Pair getSecondaryPileupNonrefEstimator(ReadBackedPileup p, List genotypes) { + char snp; + try { + snp = getNonref(genotypes, p.getRef()); + } catch ( IllegalStateException e ) { + // tri-allelic site + // System.out.println("Illegal State Exception caught at "+p.getLocation().toString()+" 2bb skew annotation suppressed ("+e.getLocalizedMessage()+")"); + return null; + } + int variantDepth = 0; + int variantsWithRefSecondBase = 0; + char[] primaryPileup = p.getBases().toCharArray(); + String secondBasePileup = p.getSecondaryBasePileup(); + + if ( secondBasePileup == null ) { + // System.out.println("Warning: Second base pileup is null at "+p.getLocation().toString()); + return null; + } else { + char [] secondaryPileup = secondBasePileup.toCharArray(); + //System.out.printf("primary=%d secondary=%d locus=%s%n", primaryPileup.length, secondaryPileup.length, p.getLocation().toString()); + + for ( int i = 0; i < primaryPileup.length; i ++ ) { + if ( BaseUtils.basesAreEqual((byte) primaryPileup[i], (byte) snp) ) { + variantDepth++; + if ( BaseUtils.basesAreEqual((byte) secondaryPileup[i], (byte) p.getRef()) ) { + variantsWithRefSecondBase++; + } + } + } + + + double biasedProportion = ( 1.0 + variantsWithRefSecondBase )/(1.0 + variantDepth ); + + return new Pair(variantDepth+1, biasedProportion); + } + + } + + private char getNonref(List genotypes, char ref) { + for ( Genotype g : genotypes ) { + if ( g.isVariant(ref) ) { + return g.toVariation(ref).getAlternativeBaseForSNP(); + } + } + + throw new StingException("List of genotypes did not contain a variant."); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 3204fe53d..0e7f7b81d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -201,8 +201,10 @@ public class VariantAnnotator extends RodWalker { for ( VariantAnnotation annotator : annotations) { Pair annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), genotypes); - if ( annot != null ) + if ( annot != null ) { + // System.out.println("Annotating: First="+annot.getFirst()+" Second="+annot.getSecond()); results.put(annot.first, annot.second); + } } return results; diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java new file mode 100755 index 000000000..b23b82c04 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Nov 17, 2009 + * Time: 6:58:09 PM + * To change this template use File | Settings | File Templates. + */ +public class SecondBaseSkewIntegrationTest extends WalkerTest { + + @Test + public void secondBaseSkewTest() { + for ( int test = 1; test <= 5; test ++ ) { + String bamFilePath = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.recal.annotation_subset.bam"; + String callFile = VariantAnnotatorIntegrationTest.validationDataPath()+VariantAnnotatorIntegrationTest.secondBaseTestFile(test)+".a2b.ssg1b.geli.calls"; + String args = VariantAnnotatorIntegrationTest.secondBaseTestString()+" -I "+bamFilePath+" -B variant,Variants,"+callFile+" "+VariantAnnotatorIntegrationTest.secondBaseTestInterval(test)+" -sample variant"; + WalkerTestSpec spec = new WalkerTestSpec(args,1, Arrays.asList(VariantAnnotatorIntegrationTest.secondBaseTestmd5(test))); + executeTest("Second base skew annotator, test number "+Integer.toString(test), spec); + } + } + +} diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 559004525..1c40eb13a 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -1,15 +1,60 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.StingException; import org.junit.Test; import java.util.Arrays; public class VariantAnnotatorIntegrationTest extends WalkerTest { + + public static String secondBaseTestString() { + return "-T VariantAnnotator -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -vcf %s -A SecondBaseSkew"; + } + + public static String validationDataPath() { + return "/humgen/gsa-scr1/GATK_Data/Validation_Data/"; + } + + public static String secondBaseTestFile( int testNo ) { + switch ( testNo ) { + case 1: return "NA12891"; + case 2: return "NA12842"; + case 3: return "NA18510"; + case 4: return "NA18960"; + case 5: return "NA20762"; + default: throw new StingException("Impossible test has been run: secondbasetest number "+ testNo); + } + } + + public static String secondBaseTestInterval ( int testNo ) { + switch ( testNo ) { + case 1: return "-L chr1:14,000,000-18,000,000"; + case 2: return "-L chr2:10,000,000-18,000,000"; + case 3: return "-L chr1:10,000,000-20,000,000"; + case 4: return "-L chr1:10,000,000-15,000,000 -L chr2:5,000,000-12,000,000 -L chr3:4,000,000-14,000,000"; + case 5: return "-L chr20 -L chr21 -L chr22"; + default: throw new StingException("Impossible test has been run: secondbasetest number "+testNo); + } + } + + public static String secondBaseTestmd5( int testNo ) { + switch ( testNo ) { + case 1: return "48adec9c7fa6d3f5f5647b8eababd4e3"; + case 2: return "2b62ad3444bd16a9809ed2df5d6c66ca"; + case 3: return "90a24b6103c3d7b40ab41204be33005d"; + case 4: return "cc62790f15fb070a59b04cdb94fdd0e9"; + case 5: return "10cffb288bcd0bc992a2603ff0a0e02e"; + default: throw new StingException("Impossible test has been run: secondbasetest number "+testNo); + } + } + public static String baseTestString() { return "-T VariantAnnotator -R /broad/1KG/reference/human_b36_both.fasta -vcf %s"; } + + @Test public void testHasAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( @@ -73,4 +118,5 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { Arrays.asList("1ed34e09afbc25e034597a97c7861077")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } + } \ No newline at end of file