Enable and adapt HaplotypeScore and MappingQualityZero as active region annotations now that we have per-read likelihoods passed in to annotations

This commit is contained in:
Guillermo del Angel 2012-08-21 14:35:40 -04:00
parent d0644b3565
commit 6a8cf1c84a
6 changed files with 83 additions and 20 deletions

View File

@ -178,7 +178,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
* so annotations will be excluded even if they are explicitly included with the other options.
*/
@Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
protected List<String> annotationsToExclude = new ArrayList<String>(Arrays.asList(new String[]{"HaplotypeScore", "MappingQualityZero", "SpanningDeletions", "TandemRepeatAnnotator"}));
protected List<String> annotationsToExclude = new ArrayList<String>(Arrays.asList(new String[]{"SpanningDeletions", "TandemRepeatAnnotator"}));
/**
* Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.

View File

@ -58,7 +58,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
return;
if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty())
annotateWithLikelihoods(alleleLikelihoodMap, ref.getBase(), vc, gb);
annotateWithLikelihoods(alleleLikelihoodMap, vc, gb);
else if ( vc.isSNP() && stratifiedContext != null)
annotateWithPileup(stratifiedContext, vc, gb);
}
@ -84,7 +84,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
gb.AD(counts);
}
private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final VariantContext vc, final GenotypeBuilder gb) {
final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
for ( final Allele allele : vc.getAlleles() ) {

View File

@ -28,6 +28,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
@ -56,7 +57,7 @@ import java.util.*;
* are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
* Note that the Haplotype Score is only calculated for sites with read coverage.
*/
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10;
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
@ -68,10 +69,19 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
if (vc.isSNP() && stratifiedContexts != null)
return annotatePileup(ref, stratifiedContexts, vc);
else if (stratifiedPerReadAlleleLikelihoodMap != null)
return annotateWithLikelihoods(stratifiedPerReadAlleleLikelihoodMap, vc);
else
return null;
}
if (!vc.isSNP() && !vc.isIndel() && !vc.isMixed())
private Map<String, Object> annotatePileup(final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc) {
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
return null;
final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values());
@ -92,16 +102,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName());
if (thisContext != null) {
final ReadBackedPileup thisPileup = thisContext.getBasePileup();
if (vc.isSNP())
scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
else if (vc.isIndel() || vc.isMixed()) {
if (stratifiedPerReadAlleleLikelihoodMap == null)
return null;
Double d = scoreIndelsAgainstHaplotypes(stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName()));
if (d == null)
return null;
scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
}
scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
}
}
}
@ -112,6 +113,31 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
return map;
}
private Map<String, Object> annotateWithLikelihoods(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
final VariantContext vc) {
final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage();
for (final Genotype genotype : vc.getGenotypes()) {
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
if (perReadAlleleLikelihoodMap == null)
continue;
Double d = scoreIndelsAgainstHaplotypes(perReadAlleleLikelihoodMap);
if (d == null)
continue;
scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
}
// if (scoreRA.observationCount() == 0)
// return null;
// annotate the score in the info field
final Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.4f", scoreRA.mean()));
return map;
}
private static class HaplotypeComparator implements Comparator<Haplotype>, Serializable {
public int compare(Haplotype a, Haplotype b) {

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
@ -12,6 +13,8 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
@ -23,7 +26,7 @@ import java.util.Map;
/**
* Total count across all samples of mapping quality zero reads
*/
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
@ -31,6 +34,17 @@ public class MappingQualityZero extends InfoFieldAnnotation implements StandardA
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if (vc.isSNP() && stratifiedContexts != null)
return annotatePileup(ref, stratifiedContexts, vc);
else if (stratifiedPerReadAlleleLikelihoodMap != null)
return annotateWithLikelihoods(stratifiedPerReadAlleleLikelihoodMap, vc);
else
return null;
}
private Map<String, Object> annotatePileup(final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;
@ -48,6 +62,25 @@ public class MappingQualityZero extends InfoFieldAnnotation implements StandardA
return map;
}
private Map<String, Object> annotateWithLikelihoods(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
final VariantContext vc) {
if (stratifiedPerReadAlleleLikelihoodMap == null)
return null;
int mq0 = 0;
for ( PerReadAlleleLikelihoodMap likelihoodMap : stratifiedPerReadAlleleLikelihoodMap.values() ) {
for (GATKSAMRecord read : likelihoodMap.getLikelihoodReadMap().keySet()) {
if (read.getMappingQuality() == 0 )
mq0++;
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%d", mq0));
return map;
}
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
public List<VCFInfoHeaderLine> getDescriptions() {

View File

@ -291,8 +291,12 @@ public class VariantAnnotatorEngine {
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype genotype : vc.getGenotypes() ) {
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
AlignmentContext context = null;
PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = null;
if (stratifiedContexts != null)
context = stratifiedContexts.get(genotype.getSampleName());
if (stratifiedPerReadAlleleLikelihoodMap != null)
perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
if ( context == null && perReadAlleleLikelihoodMap == null) {
// no likelihoods nor pileup available: just move on to next sample

View File

@ -11,7 +11,7 @@ import java.util.Map;
// TODO -- make this an abstract class when we move away from InfoFieldAnnotation
public interface ActiveRegionBasedAnnotation extends AnnotationType {
// return annotations for the given contexts split by sample and then read likelihoof
// return annotations for the given contexts split by sample and then read likelihood
public abstract Map<String, Object> annotate(final Map<String,PerReadAlleleLikelihoodMap> stratifiedContexts, final VariantContext vc);
// return the descriptions used for the VCF INFO meta field