Enable and adapt HaplotypeScore and MappingQualityZero as active region annotations now that we have per-read likelihoods passed in to annotations
This commit is contained in:
parent
d0644b3565
commit
6a8cf1c84a
|
|
@ -178,7 +178,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
* so annotations will be excluded even if they are explicitly included with the other options.
|
||||
*/
|
||||
@Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
|
||||
protected List<String> annotationsToExclude = new ArrayList<String>(Arrays.asList(new String[]{"HaplotypeScore", "MappingQualityZero", "SpanningDeletions", "TandemRepeatAnnotator"}));
|
||||
protected List<String> annotationsToExclude = new ArrayList<String>(Arrays.asList(new String[]{"SpanningDeletions", "TandemRepeatAnnotator"}));
|
||||
|
||||
/**
|
||||
* Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
return;
|
||||
|
||||
if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty())
|
||||
annotateWithLikelihoods(alleleLikelihoodMap, ref.getBase(), vc, gb);
|
||||
annotateWithLikelihoods(alleleLikelihoodMap, vc, gb);
|
||||
else if ( vc.isSNP() && stratifiedContext != null)
|
||||
annotateWithPileup(stratifiedContext, vc, gb);
|
||||
}
|
||||
|
|
@ -84,7 +84,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
gb.AD(counts);
|
||||
}
|
||||
|
||||
private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
|
||||
|
||||
for ( final Allele allele : vc.getAlleles() ) {
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
@ -56,7 +57,7 @@ import java.util.*;
|
|||
* are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
|
||||
* Note that the Haplotype Score is only calculated for sites with read coverage.
|
||||
*/
|
||||
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
private final static boolean DEBUG = false;
|
||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
||||
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
|
||||
|
|
@ -68,10 +69,19 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
|
||||
if (vc.isSNP() && stratifiedContexts != null)
|
||||
return annotatePileup(ref, stratifiedContexts, vc);
|
||||
else if (stratifiedPerReadAlleleLikelihoodMap != null)
|
||||
return annotateWithLikelihoods(stratifiedPerReadAlleleLikelihoodMap, vc);
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!vc.isSNP() && !vc.isIndel() && !vc.isMixed())
|
||||
private Map<String, Object> annotatePileup(final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc) {
|
||||
|
||||
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
|
||||
return null;
|
||||
|
||||
final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values());
|
||||
|
|
@ -92,16 +102,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName());
|
||||
if (thisContext != null) {
|
||||
final ReadBackedPileup thisPileup = thisContext.getBasePileup();
|
||||
if (vc.isSNP())
|
||||
scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
else if (vc.isIndel() || vc.isMixed()) {
|
||||
if (stratifiedPerReadAlleleLikelihoodMap == null)
|
||||
return null;
|
||||
Double d = scoreIndelsAgainstHaplotypes(stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName()));
|
||||
if (d == null)
|
||||
return null;
|
||||
scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
}
|
||||
scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -112,6 +113,31 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
return map;
|
||||
}
|
||||
|
||||
private Map<String, Object> annotateWithLikelihoods(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
||||
final VariantContext vc) {
|
||||
|
||||
final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage();
|
||||
for (final Genotype genotype : vc.getGenotypes()) {
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
if (perReadAlleleLikelihoodMap == null)
|
||||
continue;
|
||||
|
||||
Double d = scoreIndelsAgainstHaplotypes(perReadAlleleLikelihoodMap);
|
||||
if (d == null)
|
||||
continue;
|
||||
scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
}
|
||||
|
||||
// if (scoreRA.observationCount() == 0)
|
||||
// return null;
|
||||
|
||||
// annotate the score in the info field
|
||||
final Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.4f", scoreRA.mean()));
|
||||
return map;
|
||||
|
||||
}
|
||||
|
||||
private static class HaplotypeComparator implements Comparator<Haplotype>, Serializable {
|
||||
|
||||
public int compare(Haplotype a, Haplotype b) {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
@ -12,6 +13,8 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -23,7 +26,7 @@ import java.util.Map;
|
|||
/**
|
||||
* Total count across all samples of mapping quality zero reads
|
||||
*/
|
||||
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
|
|
@ -31,6 +34,17 @@ public class MappingQualityZero extends InfoFieldAnnotation implements StandardA
|
|||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if (vc.isSNP() && stratifiedContexts != null)
|
||||
return annotatePileup(ref, stratifiedContexts, vc);
|
||||
else if (stratifiedPerReadAlleleLikelihoodMap != null)
|
||||
return annotateWithLikelihoods(stratifiedPerReadAlleleLikelihoodMap, vc);
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
private Map<String, Object> annotatePileup(final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
@ -48,6 +62,25 @@ public class MappingQualityZero extends InfoFieldAnnotation implements StandardA
|
|||
return map;
|
||||
}
|
||||
|
||||
private Map<String, Object> annotateWithLikelihoods(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
||||
final VariantContext vc) {
|
||||
if (stratifiedPerReadAlleleLikelihoodMap == null)
|
||||
return null;
|
||||
|
||||
int mq0 = 0;
|
||||
for ( PerReadAlleleLikelihoodMap likelihoodMap : stratifiedPerReadAlleleLikelihoodMap.values() ) {
|
||||
for (GATKSAMRecord read : likelihoodMap.getLikelihoodReadMap().keySet()) {
|
||||
|
||||
if (read.getMappingQuality() == 0 )
|
||||
mq0++;
|
||||
}
|
||||
}
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%d", mq0));
|
||||
return map;
|
||||
}
|
||||
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
|
|
|
|||
|
|
@ -291,8 +291,12 @@ public class VariantAnnotatorEngine {
|
|||
|
||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
AlignmentContext context = null;
|
||||
PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = null;
|
||||
if (stratifiedContexts != null)
|
||||
context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if (stratifiedPerReadAlleleLikelihoodMap != null)
|
||||
perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
|
||||
if ( context == null && perReadAlleleLikelihoodMap == null) {
|
||||
// no likelihoods nor pileup available: just move on to next sample
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import java.util.Map;
|
|||
|
||||
// TODO -- make this an abstract class when we move away from InfoFieldAnnotation
|
||||
public interface ActiveRegionBasedAnnotation extends AnnotationType {
|
||||
// return annotations for the given contexts split by sample and then read likelihoof
|
||||
// return annotations for the given contexts split by sample and then read likelihood
|
||||
public abstract Map<String, Object> annotate(final Map<String,PerReadAlleleLikelihoodMap> stratifiedContexts, final VariantContext vc);
|
||||
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
|
|
|
|||
Loading…
Reference in New Issue