1) Fix MyHaplotypeScore to ignore 454 reads, since all those pathological non-existing indels make some sites' score blow up. If a site is only covered by 454 reads, we (hopefully) detect this graciously and just emit a score of 0.0 for the site.
2) New annotation SByDepth = log10(-StrandBias/Depth) (non-standard annotation, key name = "SBD"). If StrandBias/Depth happens to be positive (very rare but can happen), annotation gets value=-1000. 3) Abstracted out new class AnnotationByDepth so that QD and SBD can share code. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3930 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bf60ed0b25
commit
e6e8a20a1e
|
|
@ -0,0 +1,30 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
|
||||
public abstract class AnnotationByDepth implements InfoFieldAnnotation {
|
||||
|
||||
|
||||
protected int annotationByVariantDepth(final Map<String, Genotype> genotypes, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
|
||||
int depth = 0;
|
||||
for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
|
||||
|
||||
// we care only about variant calls
|
||||
if ( genotype.getValue().isHomRef() )
|
||||
continue;
|
||||
|
||||
StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
|
||||
if ( context != null )
|
||||
depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size();
|
||||
}
|
||||
|
||||
return depth;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
|
@ -38,6 +39,8 @@ import org.broadinstitute.sting.utils.pileup.*;
|
|||
|
||||
import java.util.*;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
public class MyHaplotypeScore implements InfoFieldAnnotation {
|
||||
private final static boolean DEBUG = false;
|
||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
||||
|
|
@ -56,7 +59,10 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
|
|||
List<Haplotype> haplotypes = computeHaplotypes(context.getBasePileup(), contextSize);
|
||||
|
||||
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
|
||||
double score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize);
|
||||
double score = 0.0;
|
||||
|
||||
if (haplotypes != null)
|
||||
score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize);
|
||||
|
||||
// return the score
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
|
|
@ -85,8 +91,11 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
|
|||
|
||||
|
||||
for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
|
||||
if (ReadUtils.is454Read(p.getRead()))
|
||||
continue;
|
||||
Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize);
|
||||
|
||||
|
||||
haplotypeQueue.add(haplotypeFromRead);
|
||||
//haplotypeList.add(haplotypeFromRead);
|
||||
}
|
||||
|
|
@ -141,12 +150,16 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
|
|||
secondBestIdxVal = qualSum;
|
||||
}
|
||||
}
|
||||
Haplotype haplotypeR = haplotypeList.get(bestIdx);
|
||||
Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
|
||||
if (haplotypeList.size() > 0) {
|
||||
Haplotype haplotypeR = haplotypeList.get(bestIdx);
|
||||
Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
|
||||
|
||||
// Temp hack to match old implementation's scaling, TBD better behavior
|
||||
// Temp hack to match old implementation's scaling, TBD better behavior
|
||||
|
||||
return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize));
|
||||
return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize));
|
||||
}
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
private Haplotype getHaplotypeFromRead(ExtendedPileupElement p, int contextSize) {
|
||||
|
|
@ -233,6 +246,9 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
|
|||
SAMRecord read = p.getRead();
|
||||
int readOffsetFromPileup = p.getOffset();
|
||||
|
||||
if (ReadUtils.is454Read(read))
|
||||
continue;
|
||||
|
||||
if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName());
|
||||
double m = 10000000;
|
||||
for ( int i = 0; i < haplotypes.size(); i++ ) {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
||||
import java.util.Map;
|
||||
|
|
@ -15,7 +14,7 @@ import java.util.List;
|
|||
import java.util.Arrays;
|
||||
|
||||
|
||||
public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
|
|
@ -26,7 +25,7 @@ public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
|
|||
return null;
|
||||
|
||||
//double QbyD = genotypeQualByDepth(genotypes, stratifiedContexts);
|
||||
int qDepth = variationQualByDepth(genotypes, stratifiedContexts);
|
||||
int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
|
||||
if ( qDepth == 0 )
|
||||
return null;
|
||||
|
||||
|
|
@ -40,42 +39,4 @@ public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
|
|||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
|
||||
|
||||
private int variationQualByDepth(final Map<String, Genotype> genotypes, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
|
||||
int depth = 0;
|
||||
for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
|
||||
|
||||
// we care only about variant calls
|
||||
if ( genotype.getValue().isHomRef() )
|
||||
continue;
|
||||
|
||||
StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
|
||||
if ( context != null )
|
||||
depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size();
|
||||
}
|
||||
|
||||
return depth;
|
||||
}
|
||||
|
||||
// private double genotypeQualByDepth(final Map<String, Genotype> genotypes, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
|
||||
// ArrayList<Double> qualsByDepth = new ArrayList<Double>();
|
||||
// for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
|
||||
//
|
||||
// // we care only about variant calls
|
||||
// if ( genotype.getValue().isHomRef() )
|
||||
// continue;
|
||||
//
|
||||
// StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
|
||||
// if ( context == null )
|
||||
// continue;
|
||||
//
|
||||
// qualsByDepth.add(genotype.getValue().getNegLog10PError() / context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size());
|
||||
// }
|
||||
//
|
||||
// double mean = 0.0;
|
||||
// for ( Double value : qualsByDepth )
|
||||
// mean += value;
|
||||
// mean /= qualsByDepth.size();
|
||||
//
|
||||
// return mean;
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
|
||||
public class SBByDepth extends AnnotationByDepth {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
|
||||
return null;
|
||||
|
||||
double sBias = Double.valueOf(vc.getAttributeAsString(VCFConstants.STRAND_BIAS_KEY));
|
||||
|
||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
int sDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
|
||||
if ( sDepth == 0 )
|
||||
return null;
|
||||
|
||||
|
||||
|
||||
double SbyD = (-sBias / (double)sDepth);
|
||||
if (SbyD > 0)
|
||||
SbyD = Math.log10(SbyD);
|
||||
else
|
||||
SbyD = -1000;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", SbyD));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("SBD"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Strand Bias by Depth")); }
|
||||
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue