From e6e8a20a1e402eefe6e59c2ba76e2aab1562cb75 Mon Sep 17 00:00:00 2001 From: delangel Date: Wed, 4 Aug 2010 15:23:08 +0000 Subject: [PATCH] 1) Fix MyHaplotypeScore to ignore 454 reads, since all those pathological non-existing indels make some sites' score blow up. If a site is only covered by 454 reads, we (hopefully) detect this graciously and just emit a score of 0.0 for the site. 2) New annotation SByDepth = log10(-StrandBias/Depth) (non-standard annotation, key name = "SBD"). If StrandBias/Depth happens to be positive (very rare but can happen), annotation gets value=-1000. 3) Abstracted out new class AnnotationByDepth so that QD and SBD can share code. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3930 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/annotator/AnnotationByDepth.java | 30 ++++++++++ .../walkers/annotator/MyHaplotypeScore.java | 26 +++++++-- .../gatk/walkers/annotator/QualByDepth.java | 45 +-------------- .../gatk/walkers/annotator/SBByDepth.java | 57 +++++++++++++++++++ 4 files changed, 111 insertions(+), 47 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java create mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java new file mode 100755 index 000000000..7ab8a0819 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import java.util.Map; + + + +public abstract class AnnotationByDepth implements InfoFieldAnnotation { + + + protected int annotationByVariantDepth(final Map genotypes, Map stratifiedContexts) { + int depth = 0; + for ( Map.Entry genotype : genotypes.entrySet() ) { + + // we care only about variant calls + if ( genotype.getValue().isHomRef() ) + continue; + + StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey()); + if ( context != null ) + depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size(); + } + + return depth; + } + + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MyHaplotypeScore.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MyHaplotypeScore.java index 572923978..326c9cd2a 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MyHaplotypeScore.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MyHaplotypeScore.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; +import org.broadinstitute.sting.gatk.filters.Platform454Filter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.*; @@ -38,6 +39,8 @@ import org.broadinstitute.sting.utils.pileup.*; import java.util.*; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; + public class MyHaplotypeScore implements InfoFieldAnnotation { private final static boolean DEBUG = false; private final static int MIN_CONTEXT_WING_SIZE = 10; @@ -56,7 +59,10 @@ public class MyHaplotypeScore implements InfoFieldAnnotation { List haplotypes = computeHaplotypes(context.getBasePileup(), contextSize); // calculate the haplotype scores by walking over all reads and comparing them to the haplotypes - double score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize); + double score = 0.0; + + if (haplotypes != null) + score = scoreReadsAgainstHaplotypes(haplotypes, context.getBasePileup(), contextSize); // return the score Map map = new HashMap(); @@ -85,8 +91,11 @@ public class MyHaplotypeScore implements InfoFieldAnnotation { for ( ExtendedPileupElement p : pileup.extendedForeachIterator() ) { + if (ReadUtils.is454Read(p.getRead())) + continue; Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize); + haplotypeQueue.add(haplotypeFromRead); //haplotypeList.add(haplotypeFromRead); } @@ -141,12 +150,16 @@ public class MyHaplotypeScore implements InfoFieldAnnotation { secondBestIdxVal = qualSum; } } - Haplotype haplotypeR = haplotypeList.get(bestIdx); - Haplotype haplotypeA = haplotypeList.get(secondBestIdx); + if (haplotypeList.size() > 0) { + Haplotype haplotypeR = haplotypeList.get(bestIdx); + Haplotype haplotypeA = haplotypeList.get(secondBestIdx); - // Temp hack to match old implementation's scaling, TBD better behavior + // Temp hack to match old implementation's scaling, TBD better behavior - return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize)); + return Arrays.asList(new Haplotype(haplotypeR.bases, 60), new Haplotype(haplotypeA.bases, contextSize)); + } + else + return null; } private Haplotype getHaplotypeFromRead(ExtendedPileupElement p, int contextSize) { @@ -233,6 +246,9 @@ public class MyHaplotypeScore implements InfoFieldAnnotation { SAMRecord read = p.getRead(); int readOffsetFromPileup = p.getOffset(); + if (ReadUtils.is454Read(read)) + continue; + if ( DEBUG ) System.out.printf("--------------------------------------------- Read %s%n", read.getReadName()); double m = 10000000; for ( int i = 0; i < haplotypes.size(); i++ ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 898309e92..fdcd24fe9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import java.util.Map; @@ -15,7 +14,7 @@ import java.util.List; import java.util.Arrays; -public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation { +public class QualByDepth extends AnnotationByDepth implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) @@ -26,7 +25,7 @@ public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation { return null; //double QbyD = genotypeQualByDepth(genotypes, stratifiedContexts); - int qDepth = variationQualByDepth(genotypes, stratifiedContexts); + int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts); if ( qDepth == 0 ) return null; @@ -40,42 +39,4 @@ public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation { public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); } - private int variationQualByDepth(final Map genotypes, Map stratifiedContexts) { - int depth = 0; - for ( Map.Entry genotype : genotypes.entrySet() ) { - - // we care only about variant calls - if ( genotype.getValue().isHomRef() ) - continue; - - StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey()); - if ( context != null ) - depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size(); - } - - return depth; - } - -// private double genotypeQualByDepth(final Map genotypes, Map stratifiedContexts) { -// ArrayList qualsByDepth = new ArrayList(); -// for ( Map.Entry genotype : genotypes.entrySet() ) { -// -// // we care only about variant calls -// if ( genotype.getValue().isHomRef() ) -// continue; -// -// StratifiedAlignmentContext context = stratifiedContexts.get(genotype.getKey()); -// if ( context == null ) -// continue; -// -// qualsByDepth.add(genotype.getValue().getNegLog10PError() / context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()); -// } -// -// double mean = 0.0; -// for ( Double value : qualsByDepth ) -// mean += value; -// mean /= qualsByDepth.size(); -// -// return mean; -// } -} \ No newline at end of file + } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java new file mode 100755 index 000000000..8387825e6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java @@ -0,0 +1,57 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broad.tribble.vcf.VCFConstants; +import org.broad.tribble.vcf.VCFHeaderLineType; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + + +public class SBByDepth extends AnnotationByDepth { + + public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( stratifiedContexts.size() == 0 ) + return null; + + if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY)) + return null; + + double sBias = Double.valueOf(vc.getAttributeAsString(VCFConstants.STRAND_BIAS_KEY)); + + final Map genotypes = vc.getGenotypes(); + if ( genotypes == null || genotypes.size() == 0 ) + return null; + + int sDepth = annotationByVariantDepth(genotypes, stratifiedContexts); + if ( sDepth == 0 ) + return null; + + + + double SbyD = (-sBias / (double)sDepth); + if (SbyD > 0) + SbyD = Math.log10(SbyD); + else + SbyD = -1000; + + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%.2f", SbyD)); + return map; + } + + public List getKeyNames() { return Arrays.asList("SBD"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Strand Bias by Depth")); } + + + +} \ No newline at end of file