1) Fix MyHaplotypeScore to ignore 454 reads, since all those pathological non-existing indels make some sites' score blow up. If a site is only covered by 454 reads, we (hopefully) detect this graciously and just emit a score of 0.0 for the site.

2) New annotation SByDepth = log10(-StrandBias/Depth) (non-standard annotation, key name = "SBD"). If StrandBias/Depth happens to be positive (very rare but can happen), annotation gets value=-1000. 
3) Abstracted out new class AnnotationByDepth so that QD and SBD can share code.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3930 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2010-08-04 15:23:08 +00:00
parent bf60ed0b25
commit e6e8a20a1e
4 changed files with 111 additions and 47 deletions

View File

@ -0,0 +1,30 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import java.util.Map;
public abstract class AnnotationByDepth implements InfoFieldAnnotation {
protected int annotationByVariantDepth(final Map<String, Genotype> genotypes, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
int depth = 0;
for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
// we care only about variant calls
if ( genotype.getValue().isHomRef() )
continue;
StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
if ( context != null )
depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size();
}
return depth;
}
}

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.*;
@ -38,6 +39,8 @@ import org.broadinstitute.sting.utils.pileup.*;
import java.util.*;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
public class MyHaplotypeScore implements InfoFieldAnnotation {
private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10;
@ -56,7 +59,10 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
List<Haplotype> haplotypes = computeHaplotypes(context.getBasePileup(), contextSize);
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
double score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize);
double score = 0.0;
if (haplotypes != null)
score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize);
// return the score
Map<String, Object> map = new HashMap<String, Object>();
@ -85,8 +91,11 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
if (ReadUtils.is454Read(p.getRead()))
continue;
Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize);
haplotypeQueue.add(haplotypeFromRead);
//haplotypeList.add(haplotypeFromRead);
}
@ -141,12 +150,16 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
secondBestIdxVal = qualSum;
}
}
Haplotype haplotypeR = haplotypeList.get(bestIdx);
Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
if (haplotypeList.size() > 0) {
Haplotype haplotypeR = haplotypeList.get(bestIdx);
Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
// Temp hack to match old implementation's scaling, TBD better behavior
// Temp hack to match old implementation's scaling, TBD better behavior
return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize));
return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize));
}
else
return null;
}
private Haplotype getHaplotypeFromRead(ExtendedPileupElement p, int contextSize) {
@ -233,6 +246,9 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
SAMRecord read = p.getRead();
int readOffsetFromPileup = p.getOffset();
if (ReadUtils.is454Read(read))
continue;
if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName());
double m = 10000000;
for ( int i = 0; i < haplotypes.size(); i++ ) {

View File

@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import java.util.Map;
@ -15,7 +14,7 @@ import java.util.List;
import java.util.Arrays;
public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
@ -26,7 +25,7 @@ public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
return null;
//double QbyD = genotypeQualByDepth(genotypes, stratifiedContexts);
int qDepth = variationQualByDepth(genotypes, stratifiedContexts);
int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
if ( qDepth == 0 )
return null;
@ -40,42 +39,4 @@ public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
private int variationQualByDepth(final Map<String, Genotype> genotypes, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
int depth = 0;
for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
// we care only about variant calls
if ( genotype.getValue().isHomRef() )
continue;
StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
if ( context != null )
depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size();
}
return depth;
}
// private double genotypeQualByDepth(final Map<String, Genotype> genotypes, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
// ArrayList<Double> qualsByDepth = new ArrayList<Double>();
// for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
//
// // we care only about variant calls
// if ( genotype.getValue().isHomRef() )
// continue;
//
// StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
// if ( context == null )
// continue;
//
// qualsByDepth.add(genotype.getValue().getNegLog10PError() / context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size());
// }
//
// double mean = 0.0;
// for ( Double value : qualsByDepth )
// mean += value;
// mean /= qualsByDepth.size();
//
// return mean;
// }
}
}

View File

@ -0,0 +1,57 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFConstants;
import org.broad.tribble.vcf.VCFHeaderLineType;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class SBByDepth extends AnnotationByDepth {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;
if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
return null;
double sBias = Double.valueOf(vc.getAttributeAsString(VCFConstants.STRAND_BIAS_KEY));
final Map<String, Genotype> genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
int sDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
if ( sDepth == 0 )
return null;
double SbyD = (-sBias / (double)sDepth);
if (SbyD > 0)
SbyD = Math.log10(SbyD);
else
SbyD = -1000;
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.2f", SbyD));
return map;
}
public List<String> getKeyNames() { return Arrays.asList("SBD"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Strand Bias by Depth")); }
}