From 6a8cf1c84a131a2fb8d9a6c9e0b5b9ef16799a67 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Tue, 21 Aug 2012 14:35:40 -0400 Subject: [PATCH] Enable and adapt HaplotypeScore and MappingQualityZero as active region annotations now that we have per-read likelihoods passed in to annotations --- .../haplotypecaller/HaplotypeCaller.java | 2 +- .../annotator/DepthPerAlleleBySample.java | 4 +- .../walkers/annotator/HaplotypeScore.java | 52 ++++++++++++++----- .../walkers/annotator/MappingQualityZero.java | 35 ++++++++++++- .../annotator/VariantAnnotatorEngine.java | 8 ++- .../ActiveRegionBasedAnnotation.java | 2 +- 6 files changed, 83 insertions(+), 20 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 01c3f0491..39c7551f0 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -178,7 +178,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem * so annotations will be excluded even if they are explicitly included with the other options. */ @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false) - protected List annotationsToExclude = new ArrayList(Arrays.asList(new String[]{"HaplotypeScore", "MappingQualityZero", "SpanningDeletions", "TandemRepeatAnnotator"})); + protected List annotationsToExclude = new ArrayList(Arrays.asList(new String[]{"SpanningDeletions", "TandemRepeatAnnotator"})); /** * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 055656fe2..b3fc67c2f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -58,7 +58,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa return; if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty()) - annotateWithLikelihoods(alleleLikelihoodMap, ref.getBase(), vc, gb); + annotateWithLikelihoods(alleleLikelihoodMap, vc, gb); else if ( vc.isSNP() && stratifiedContext != null) annotateWithPileup(stratifiedContext, vc, gb); } @@ -84,7 +84,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa gb.AD(counts); } - private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) { + private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final VariantContext vc, final GenotypeBuilder gb) { final HashMap alleleCounts = new HashMap(); for ( final Allele allele : vc.getAlleles() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index eedb8cbd7..2bf060f12 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -28,6 +28,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; @@ -56,7 +57,7 @@ import java.util.*; * are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls. * Note that the Haplotype Score is only calculated for sites with read coverage. */ -public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation { +public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { private final static boolean DEBUG = false; private final static int MIN_CONTEXT_WING_SIZE = 10; private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50; @@ -68,10 +69,19 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot final Map stratifiedContexts, final VariantContext vc, final Map stratifiedPerReadAlleleLikelihoodMap) { - if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here + if (vc.isSNP() && stratifiedContexts != null) + return annotatePileup(ref, stratifiedContexts, vc); + else if (stratifiedPerReadAlleleLikelihoodMap != null) + return annotateWithLikelihoods(stratifiedPerReadAlleleLikelihoodMap, vc); + else return null; + } - if (!vc.isSNP() && !vc.isIndel() && !vc.isMixed()) + private Map annotatePileup(final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc) { + + if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here return null; final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values()); @@ -92,16 +102,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName()); if (thisContext != null) { final ReadBackedPileup thisPileup = thisContext.getBasePileup(); - if (vc.isSNP()) - scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense - else if (vc.isIndel() || vc.isMixed()) { - if (stratifiedPerReadAlleleLikelihoodMap == null) - return null; - Double d = scoreIndelsAgainstHaplotypes(stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName())); - if (d == null) - return null; - scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense - } + scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense } } } @@ -112,6 +113,31 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot return map; } + private Map annotateWithLikelihoods(final Map stratifiedPerReadAlleleLikelihoodMap, + final VariantContext vc) { + + final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage(); + for (final Genotype genotype : vc.getGenotypes()) { + final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName()); + if (perReadAlleleLikelihoodMap == null) + continue; + + Double d = scoreIndelsAgainstHaplotypes(perReadAlleleLikelihoodMap); + if (d == null) + continue; + scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense + } + + // if (scoreRA.observationCount() == 0) + // return null; + + // annotate the score in the info field + final Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%.4f", scoreRA.mean())); + return map; + + } + private static class HaplotypeComparator implements Comparator, Serializable { public int compare(Haplotype a, Haplotype b) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index c3cb01c23..f8abd59e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; @@ -12,6 +13,8 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; @@ -23,7 +26,7 @@ import java.util.Map; /** * Total count across all samples of mapping quality zero reads */ -public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { +public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { public Map annotate(final RefMetaDataTracker tracker, final AnnotatorCompatible walker, @@ -31,6 +34,17 @@ public class MappingQualityZero extends InfoFieldAnnotation implements StandardA final Map stratifiedContexts, final VariantContext vc, final Map stratifiedPerReadAlleleLikelihoodMap) { + if (vc.isSNP() && stratifiedContexts != null) + return annotatePileup(ref, stratifiedContexts, vc); + else if (stratifiedPerReadAlleleLikelihoodMap != null) + return annotateWithLikelihoods(stratifiedPerReadAlleleLikelihoodMap, vc); + else + return null; + } + + private Map annotatePileup(final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; @@ -48,6 +62,25 @@ public class MappingQualityZero extends InfoFieldAnnotation implements StandardA return map; } + private Map annotateWithLikelihoods(final Map stratifiedPerReadAlleleLikelihoodMap, + final VariantContext vc) { + if (stratifiedPerReadAlleleLikelihoodMap == null) + return null; + + int mq0 = 0; + for ( PerReadAlleleLikelihoodMap likelihoodMap : stratifiedPerReadAlleleLikelihoodMap.values() ) { + for (GATKSAMRecord read : likelihoodMap.getLikelihoodReadMap().keySet()) { + + if (read.getMappingQuality() == 0 ) + mq0++; + } + } + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%d", mq0)); + return map; + } + + public List getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); } public List getDescriptions() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 02e6a8508..a1bd8dcbd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -291,8 +291,12 @@ public class VariantAnnotatorEngine { final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples()); for ( final Genotype genotype : vc.getGenotypes() ) { - final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); - final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName()); + AlignmentContext context = null; + PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = null; + if (stratifiedContexts != null) + context = stratifiedContexts.get(genotype.getSampleName()); + if (stratifiedPerReadAlleleLikelihoodMap != null) + perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName()); if ( context == null && perReadAlleleLikelihoodMap == null) { // no likelihoods nor pileup available: just move on to next sample diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java index 7af4baddb..9c5710872 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java @@ -11,7 +11,7 @@ import java.util.Map; // TODO -- make this an abstract class when we move away from InfoFieldAnnotation public interface ActiveRegionBasedAnnotation extends AnnotationType { - // return annotations for the given contexts split by sample and then read likelihoof + // return annotations for the given contexts split by sample and then read likelihood public abstract Map annotate(final Map stratifiedContexts, final VariantContext vc); // return the descriptions used for the VCF INFO meta field