This is an annotation adapted from one that Mark Daly suggested some time ago. Right now it calculates:
- For all reference bases, the proportion of their second best bases that support the SNP - the proportion of non-reference bases that support the SNP and reports the difference between the two. Initially I was taking depth into account as well, but that did not appear to work as nicely as I'd like (even at 20,000x depth, if 95% of the non-reference bases are C, and 98% of the reference second-best-bases are C, then we would want to be suspicious of it; but perhaps slightly less so than if the depth were only 20...) Anyway it's now available. I'm not sure how useful it will be, but I spawned the FHS annotation jobs again, so we'll see. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2109 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
98f921fe24
commit
59416ae06a
|
|
@ -0,0 +1,134 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: Nov 21, 2009
|
||||||
|
* Time: 6:08:15 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class PrimaryBaseSecondaryBaseSymmetry implements VariantAnnotation{
|
||||||
|
|
||||||
|
private static boolean USE_ZERO_MAPQ_READS = true;
|
||||||
|
private static boolean USE_ZERO_QUALITY_READS = false;
|
||||||
|
private static String KEY_NAME = "1b2b_symmetry";
|
||||||
|
private static double epsilon = Math.pow(10.0,-12);
|
||||||
|
Logger logger = Logger.getLogger(PrimaryBaseSecondaryBaseSymmetry.class);
|
||||||
|
|
||||||
|
private boolean useConservativeVariance = true;
|
||||||
|
|
||||||
|
public void conservativeVarianceUsage( boolean b ) {
|
||||||
|
useConservativeVariance = b;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean useZeroMappingQualityReads() { return USE_ZERO_MAPQ_READS; }
|
||||||
|
|
||||||
|
public boolean useZeroQualityReads() { return USE_ZERO_QUALITY_READS; }
|
||||||
|
|
||||||
|
public Pair<String,String> annotate(ReferenceContext ref, ReadBackedPileup pileup, List<Genotype> genotypes) {
|
||||||
|
Pair<Integer,Double> refSecondBasePair = getProportionOfReferenceSecondBasesThatSupportAlt(ref, pileup, genotypes);
|
||||||
|
Pair<Integer,Double> nonrefPrimaryBasePair = getProportionOfPrimaryNonrefBasesThatSupportAlt(ref, pileup, genotypes);
|
||||||
|
if ( refSecondBasePair == null || nonrefPrimaryBasePair == null ) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
logger.info("2b="+refSecondBasePair.second+" 1b="+nonrefPrimaryBasePair.second);
|
||||||
|
//double primary_secondary_stat = 1.0/Math.pow( transform(refSecondBasePair) - transform(nonrefPrimaryBasePair), 2);
|
||||||
|
double primary_secondary_stat = refSecondBasePair.second - nonrefPrimaryBasePair.second;
|
||||||
|
String annotation = String.format("%f", primary_secondary_stat);
|
||||||
|
logger.info("Second-base symmetry: annotating with "+annotation);
|
||||||
|
return new Pair<String,String>(KEY_NAME, annotation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private double transform( double proportion, int depth ) {
|
||||||
|
proportion = proportion - epsilon;
|
||||||
|
if ( useConservativeVariance ) {
|
||||||
|
return proportion / ( Math.sqrt ( 0.5*(1-0.5) / Math.sqrt(depth) ) );
|
||||||
|
} else {
|
||||||
|
return proportion / ( Math.sqrt ( proportion*(1-proportion)/depth ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private double transform( Pair<Integer, Double> depth_prop ) {
|
||||||
|
return transform( depth_prop.getSecond(), depth_prop.getFirst() );
|
||||||
|
}
|
||||||
|
|
||||||
|
private Pair<Integer,Double> getProportionOfReferenceSecondBasesThatSupportAlt( ReferenceContext ref, ReadBackedPileup p, List<Genotype> genotypes) {
|
||||||
|
char snp;
|
||||||
|
try {
|
||||||
|
snp = getNonref(genotypes, ref.getBase());
|
||||||
|
} catch ( IllegalStateException e) {
|
||||||
|
logger.info("Caught: IllegalStateException -- "+e.getLocalizedMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
char[] secondaryPileup = p.getSecondaryBasePileup().toCharArray();
|
||||||
|
int depth = p.size();
|
||||||
|
|
||||||
|
int support = 0;
|
||||||
|
for ( char c : secondaryPileup ) {
|
||||||
|
if ( Character.toUpperCase(c) == Character.toUpperCase(snp) ) {
|
||||||
|
support ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double as_prop = ( ( double ) support ) / depth;
|
||||||
|
|
||||||
|
return new Pair<Integer,Double> ( depth, as_prop );
|
||||||
|
}
|
||||||
|
|
||||||
|
private Pair<Integer,Double> getProportionOfPrimaryNonrefBasesThatSupportAlt( ReferenceContext ref, ReadBackedPileup p, List<Genotype> genotypes ) {
|
||||||
|
char snp;
|
||||||
|
try {
|
||||||
|
snp = getNonref(genotypes, ref.getBase());
|
||||||
|
} catch ( IllegalStateException e ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
int [] baseCounts = p.getBasePileupAsCounts();
|
||||||
|
int support = -1;
|
||||||
|
int depth = 0;
|
||||||
|
for ( char c : BaseUtils.BASES ) {
|
||||||
|
// ignore ref
|
||||||
|
if ( Character.toUpperCase(c) == Character.toUpperCase(ref.getBase()) ) {
|
||||||
|
} else {
|
||||||
|
// catch our snp
|
||||||
|
if ( Character.toUpperCase(c) == Character.toUpperCase(snp) ) {
|
||||||
|
support = baseCounts[BaseUtils.simpleBaseToBaseIndex(c)];
|
||||||
|
depth = depth + baseCounts[BaseUtils.simpleBaseToBaseIndex(c)];
|
||||||
|
} else {
|
||||||
|
depth = depth + baseCounts[BaseUtils.simpleBaseToBaseIndex(c)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( depth == 0 || support < 0 ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
double as_prop = ( ( double ) support) / depth;
|
||||||
|
|
||||||
|
return new Pair<Integer,Double> ( depth, as_prop );
|
||||||
|
}
|
||||||
|
|
||||||
|
private char getNonref(List<Genotype> genotypes, char ref) {
|
||||||
|
//logger.info(genotypes.size());
|
||||||
|
for ( Genotype g : genotypes ) {
|
||||||
|
//logger.info("Genotype: "+g.getBases()+" Ref from genotype: "+g.getReference()+" Ref from method: "+ref);
|
||||||
|
if ( g.isVariant(ref) ) {
|
||||||
|
return g.toVariation(ref).getAlternativeBaseForSNP();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new IllegalStateException("List of genotypes did not contain a variant.");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue