1) Fix MyHaplotypeScore to ignore 454 reads, since all those pathological non-existing indels make some sites' score blow up. If a site is only covered by 454 reads, we (hopefully) detect this graciously and just emit a score of 0.0 for the site.
2) New annotation SByDepth = log10(-StrandBias/Depth) (non-standard annotation, key name = "SBD"). If StrandBias/Depth happens to be positive (very rare but can happen), annotation gets value=-1000. 3) Abstracted out new class AnnotationByDepth so that QD and SBD can share code. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3930 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bf60ed0b25
commit
e6e8a20a1e
|
|
@ -0,0 +1,30 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public abstract class AnnotationByDepth implements InfoFieldAnnotation {
|
||||||
|
|
||||||
|
|
||||||
|
protected int annotationByVariantDepth(final Map<String, Genotype> genotypes, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
|
||||||
|
int depth = 0;
|
||||||
|
for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
|
||||||
|
|
||||||
|
// we care only about variant calls
|
||||||
|
if ( genotype.getValue().isHomRef() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
|
||||||
|
if ( context != null )
|
||||||
|
depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size();
|
||||||
|
}
|
||||||
|
|
||||||
|
return depth;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||||
|
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
|
@ -38,6 +39,8 @@ import org.broadinstitute.sting.utils.pileup.*;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
public class MyHaplotypeScore implements InfoFieldAnnotation {
|
public class MyHaplotypeScore implements InfoFieldAnnotation {
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
||||||
|
|
@ -56,7 +59,10 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
|
||||||
List<Haplotype> haplotypes = computeHaplotypes(context.getBasePileup(), contextSize);
|
List<Haplotype> haplotypes = computeHaplotypes(context.getBasePileup(), contextSize);
|
||||||
|
|
||||||
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
|
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
|
||||||
double score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize);
|
double score = 0.0;
|
||||||
|
|
||||||
|
if (haplotypes != null)
|
||||||
|
score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize);
|
||||||
|
|
||||||
// return the score
|
// return the score
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
|
|
@ -85,8 +91,11 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
|
||||||
|
|
||||||
|
|
||||||
for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
|
for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) {
|
||||||
|
if (ReadUtils.is454Read(p.getRead()))
|
||||||
|
continue;
|
||||||
Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize);
|
Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize);
|
||||||
|
|
||||||
|
|
||||||
haplotypeQueue.add(haplotypeFromRead);
|
haplotypeQueue.add(haplotypeFromRead);
|
||||||
//haplotypeList.add(haplotypeFromRead);
|
//haplotypeList.add(haplotypeFromRead);
|
||||||
}
|
}
|
||||||
|
|
@ -141,12 +150,16 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
|
||||||
secondBestIdxVal = qualSum;
|
secondBestIdxVal = qualSum;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Haplotype haplotypeR = haplotypeList.get(bestIdx);
|
if (haplotypeList.size() > 0) {
|
||||||
Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
|
Haplotype haplotypeR = haplotypeList.get(bestIdx);
|
||||||
|
Haplotype haplotypeA = haplotypeList.get(secondBestIdx);
|
||||||
|
|
||||||
// Temp hack to match old implementation's scaling, TBD better behavior
|
// Temp hack to match old implementation's scaling, TBD better behavior
|
||||||
|
|
||||||
return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize));
|
return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Haplotype getHaplotypeFromRead(ExtendedPileupElement p, int contextSize) {
|
private Haplotype getHaplotypeFromRead(ExtendedPileupElement p, int contextSize) {
|
||||||
|
|
@ -233,6 +246,9 @@ public class MyHaplotypeScore implements InfoFieldAnnotation {
|
||||||
SAMRecord read = p.getRead();
|
SAMRecord read = p.getRead();
|
||||||
int readOffsetFromPileup = p.getOffset();
|
int readOffsetFromPileup = p.getOffset();
|
||||||
|
|
||||||
|
if (ReadUtils.is454Read(read))
|
||||||
|
continue;
|
||||||
|
|
||||||
if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName());
|
if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName());
|
||||||
double m = 10000000;
|
double m = 10000000;
|
||||||
for ( int i = 0; i < haplotypes.size(); i++ ) {
|
for ( int i = 0; i < haplotypes.size(); i++ ) {
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
@ -15,7 +14,7 @@ import java.util.List;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
||||||
public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
|
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
if ( stratifiedContexts.size() == 0 )
|
||||||
|
|
@ -26,7 +25,7 @@ public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
//double QbyD = genotypeQualByDepth(genotypes, stratifiedContexts);
|
//double QbyD = genotypeQualByDepth(genotypes, stratifiedContexts);
|
||||||
int qDepth = variationQualByDepth(genotypes, stratifiedContexts);
|
int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
|
||||||
if ( qDepth == 0 )
|
if ( qDepth == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
@ -40,42 +39,4 @@ public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
|
||||||
|
|
||||||
private int variationQualByDepth(final Map<String, Genotype> genotypes, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
|
}
|
||||||
int depth = 0;
|
|
||||||
for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
|
|
||||||
|
|
||||||
// we care only about variant calls
|
|
||||||
if ( genotype.getValue().isHomRef() )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
|
|
||||||
if ( context != null )
|
|
||||||
depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size();
|
|
||||||
}
|
|
||||||
|
|
||||||
return depth;
|
|
||||||
}
|
|
||||||
|
|
||||||
// private double genotypeQualByDepth(final Map<String, Genotype> genotypes, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
|
|
||||||
// ArrayList<Double> qualsByDepth = new ArrayList<Double>();
|
|
||||||
// for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
|
|
||||||
//
|
|
||||||
// // we care only about variant calls
|
|
||||||
// if ( genotype.getValue().isHomRef() )
|
|
||||||
// continue;
|
|
||||||
//
|
|
||||||
// StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey());
|
|
||||||
// if ( context == null )
|
|
||||||
// continue;
|
|
||||||
//
|
|
||||||
// qualsByDepth.add(genotype.getValue().getNegLog10PError() / context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size());
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// double mean = 0.0;
|
|
||||||
// for ( Double value : qualsByDepth )
|
|
||||||
// mean += value;
|
|
||||||
// mean /= qualsByDepth.size();
|
|
||||||
//
|
|
||||||
// return mean;
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,57 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broad.tribble.vcf.VCFConstants;
|
||||||
|
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||||
|
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public class SBByDepth extends AnnotationByDepth {
|
||||||
|
|
||||||
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
if ( stratifiedContexts.size() == 0 )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
double sBias = Double.valueOf(vc.getAttributeAsString(VCFConstants.STRAND_BIAS_KEY));
|
||||||
|
|
||||||
|
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||||
|
if ( genotypes == null || genotypes.size() == 0 )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
int sDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
|
||||||
|
if ( sDepth == 0 )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
double SbyD = (-sBias / (double)sDepth);
|
||||||
|
if (SbyD > 0)
|
||||||
|
SbyD = Math.log10(SbyD);
|
||||||
|
else
|
||||||
|
SbyD = -1000;
|
||||||
|
|
||||||
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
|
map.put(getKeyNames().get(0), String.format("%.2f", SbyD));
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getKeyNames() { return Arrays.asList("SBD"); }
|
||||||
|
|
||||||
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Strand Bias by Depth")); }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue