A framework to get annotations working in the HaplotypeCaller (and ART walkers in general).
Adding support for active-region-based annotation for most standard annotations. I need to discuss with Ryan what to do about tests that require offsets into the reads (since I don't have access to the offsets) like e.g. the ReadPosRankSumTest. IMPORTANT NOTE: this is still very much a dev effort and can only be accessed through private walkers (i.e. the HaplotypeCaller). The interface is in flux and so we are making no attempt at all to make it clean or to merge this with the Locus-Traversal-based annotation system. When we are satisfied that it's working properly and have settled on the proper interface, we will clean it up then.
This commit is contained in:
parent
5b7da3831f
commit
5bf9dd2def
|
|
@ -5,12 +5,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -31,8 +29,31 @@ public class BaseQualityRankSumTest extends RankSumTest {
|
|||
altQuals.add((double)p.getQual());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
|
||||
// TODO -- implement me; how do we pull out the correct offset from the read?
|
||||
return;
|
||||
|
||||
/*
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
||||
if ( !matchesRef && !matchesAlt )
|
||||
continue;
|
||||
|
||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
||||
|
||||
if ( isUsableBase(p) ) {
|
||||
if ( matchesRef )
|
||||
refQuals.add((double)p.getQual());
|
||||
else
|
||||
altQuals.add((double)p.getQual());
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
|
||||
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
@ -35,6 +36,8 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
|
|
@ -49,7 +52,7 @@ import java.util.Map;
|
|||
* allele Frequency, for each ALT allele, in the same order as listed; total number
|
||||
* of alleles in called genotypes.
|
||||
*/
|
||||
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
||||
|
|
@ -63,6 +66,13 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
|
|||
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if ( ! vc.hasGenotypes() )
|
||||
return null;
|
||||
|
||||
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList(keyNames);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,12 +3,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -33,7 +36,7 @@ import java.util.Map;
|
|||
* Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
|
||||
* -dcov D is N * D
|
||||
*/
|
||||
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
|
|
@ -47,6 +50,22 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
|
|||
return map;
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
int depth = 0;
|
||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
||||
for ( final List<GATKSAMRecord> alleleBin : alleleBins.values() ) {
|
||||
depth += alleleBin.size();
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%d", depth));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.DEPTH_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); }
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import cern.jet.math.Arithmetic;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
@ -37,6 +38,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
|
|
@ -49,7 +51,7 @@ import java.util.*;
|
|||
* indicative of false positive calls. Note that the fisher strand test may not be
|
||||
* calculated for certain complex indel cases or for multi-allelic sites.
|
||||
*/
|
||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
private static final String FS = "FS";
|
||||
private static final double MIN_PVALUE = 1E-320;
|
||||
|
||||
|
|
@ -78,6 +80,22 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
return map;
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if ( !vc.isVariant() )
|
||||
return null;
|
||||
|
||||
int[][] table = getContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
|
||||
|
||||
Double pvalue = Math.max(pValueForContingencyTable(table), MIN_PVALUE);
|
||||
if ( pvalue == null )
|
||||
return null;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(FS, String.format("%.3f", QualityUtils.phredScaleErrorRate(pvalue)));
|
||||
return map;
|
||||
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList(FS);
|
||||
}
|
||||
|
|
@ -193,6 +211,38 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
|
||||
* fw rc
|
||||
* allele1 # #
|
||||
* allele2 # #
|
||||
* @return a 2x2 contingency table
|
||||
*/
|
||||
private static int[][] getContingencyTable(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, Allele ref, Allele alt) {
|
||||
int[][] table = new int[2][2];
|
||||
|
||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
|
||||
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alt.equals(alleleBin.getKey());
|
||||
if ( !matchesRef && !matchesAlt )
|
||||
continue;
|
||||
|
||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
||||
boolean isFW = read.getReadNegativeStrandFlag();
|
||||
|
||||
int row = matchesRef ? 0 : 1;
|
||||
int column = isFW ? 0 : 1;
|
||||
|
||||
table[row][column]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
/**
|
||||
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
|
||||
* fw rc
|
||||
|
|
@ -214,8 +264,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
Allele base = Allele.create(p.getBase(), false);
|
||||
boolean isFW = !p.getRead().getReadNegativeStrandFlag();
|
||||
|
||||
boolean matchesRef = ref.equals(base, true);
|
||||
boolean matchesAlt = alt.equals(base, true);
|
||||
final boolean matchesRef = ref.equals(base, true);
|
||||
final boolean matchesAlt = alt.equals(base, true);
|
||||
if ( matchesRef || matchesAlt ) {
|
||||
int row = matchesRef ? 0 : 1;
|
||||
int column = isFW ? 0 : 1;
|
||||
|
|
@ -227,6 +277,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
|
||||
return table;
|
||||
}
|
||||
|
||||
/**
|
||||
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
|
||||
* fw rc
|
||||
|
|
|
|||
|
|
@ -3,12 +3,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -27,12 +30,19 @@ import java.util.Map;
|
|||
* more information. Note that the Inbreeding Coefficient will not be calculated for files
|
||||
* with fewer than a minimum (generally 10) number of samples.
|
||||
*/
|
||||
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
private static final int MIN_SAMPLES = 10;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
return calculateIC(vc);
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
return calculateIC(vc);
|
||||
}
|
||||
|
||||
private Map<String, Object> calculateIC(final VariantContext vc) {
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -6,12 +6,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -35,6 +33,23 @@ public class MappingQualityRankSumTest extends RankSumTest {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
||||
if ( !matchesRef && !matchesAlt )
|
||||
continue;
|
||||
|
||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
||||
if ( matchesRef )
|
||||
refQuals.add((double)read.getMappingQuality());
|
||||
else
|
||||
altQuals.add((double)read.getMappingQuality());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
|
||||
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
|
|
|
|||
|
|
@ -3,11 +3,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -23,7 +26,7 @@ import java.util.Map;
|
|||
* Low scores are indicative of false positive calls and artifacts. Note that QualByDepth requires sequencing
|
||||
* reads associated with the samples with polymorphic genotypes.
|
||||
*/
|
||||
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
|
|
@ -62,4 +65,40 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
int depth = 0;
|
||||
|
||||
for ( final Genotype genotype : genotypes ) {
|
||||
|
||||
// we care only about variant calls with likelihoods
|
||||
if ( !genotype.isHet() && !genotype.isHomVar() )
|
||||
continue;
|
||||
|
||||
final Map<Allele, List<GATKSAMRecord>> alleleBins = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( alleleBins == null )
|
||||
continue;
|
||||
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
|
||||
if ( !alleleBin.getKey().equals(Allele.NO_CALL) )
|
||||
depth += alleleBin.getValue().size();
|
||||
}
|
||||
}
|
||||
|
||||
if ( depth == 0 )
|
||||
return null;
|
||||
|
||||
double QD = -10.0 * vc.getLog10PError() / (double)depth;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", QD));
|
||||
return map;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
@ -13,6 +14,8 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -24,7 +27,7 @@ import java.util.Map;
|
|||
/**
|
||||
* Root Mean Square of the mapping quality of the reads across all samples.
|
||||
*/
|
||||
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
|
|
@ -34,7 +37,7 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
|
|||
for ( AlignmentContext context : stratifiedContexts.values() )
|
||||
totalSize += context.size();
|
||||
|
||||
int[] qualities = new int[totalSize];
|
||||
final int[] qualities = new int[totalSize];
|
||||
int index = 0;
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
|
|
@ -54,6 +57,35 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
|
|||
return map;
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
int depth = 0;
|
||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
|
||||
depth += alleleBin.getValue().size();
|
||||
}
|
||||
}
|
||||
|
||||
final int[] qualities = new int[depth];
|
||||
int index = 0;
|
||||
|
||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
||||
for ( final List<GATKSAMRecord> reads : alleleBins.values() ) {
|
||||
for ( final GATKSAMRecord read : reads ) {
|
||||
if ( read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
|
||||
qualities[index++] = read.getMappingQuality();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double rms = MathUtils.rms(qualities);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", rms));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "RMS Mapping Quality")); }
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
@ -12,6 +13,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
|||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
|
|
@ -26,7 +28,7 @@ import java.util.Map;
|
|||
/**
|
||||
* Abstract root for all RankSum based annotations
|
||||
*/
|
||||
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
|
||||
static final boolean DEBUG = false;
|
||||
|
||||
|
|
@ -38,7 +40,6 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
|||
if (genotypes == null || genotypes.size() == 0)
|
||||
return null;
|
||||
|
||||
|
||||
final ArrayList<Double> refQuals = new ArrayList<Double>();
|
||||
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
||||
|
||||
|
|
@ -104,12 +105,52 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
|||
if (!Double.isNaN(testResults.first))
|
||||
map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
|
||||
return map;
|
||||
|
||||
}
|
||||
|
||||
protected abstract void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals);
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if (stratifiedContexts.size() == 0)
|
||||
return null;
|
||||
|
||||
protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals);
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
if (genotypes == null || genotypes.size() == 0)
|
||||
return null;
|
||||
|
||||
final ArrayList<Double> refQuals = new ArrayList<Double>();
|
||||
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
||||
|
||||
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
||||
final Map<Allele, List<GATKSAMRecord>> context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null )
|
||||
continue;
|
||||
|
||||
fillQualsFromPileup(vc.getReference(), vc.getAlternateAlleles(), context, refQuals, altQuals);
|
||||
}
|
||||
|
||||
if ( refQuals.size() == 0 || altQuals.size() == 0 )
|
||||
return null;
|
||||
|
||||
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
|
||||
for (final Double qual : altQuals) {
|
||||
mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
|
||||
}
|
||||
for (final Double qual : refQuals) {
|
||||
mannWhitneyU.add(qual, MannWhitneyU.USet.SET2);
|
||||
}
|
||||
|
||||
// we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
|
||||
final Pair<Double, Double> testResults = mannWhitneyU.runOneSidedTest(MannWhitneyU.USet.SET1);
|
||||
|
||||
final Map<String, Object> map = new HashMap<String, Object>();
|
||||
if (!Double.isNaN(testResults.first))
|
||||
map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
|
||||
return map;
|
||||
}
|
||||
|
||||
protected abstract void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals);
|
||||
|
||||
protected abstract void fillQualsFromPileup(final byte ref, final List<Byte> alts, final ReadBackedPileup pileup, final List<Double> refQuals, final List<Double> altQuals);
|
||||
|
||||
protected abstract void fillIndelQualsFromPileup(final ReadBackedPileup pileup, final List<Double> refQuals, final List<Double> altQuals);
|
||||
|
||||
protected static boolean isUsableBase(final PileupElement p) {
|
||||
return !(p.isInsertionAtBeginningOfRead() ||
|
||||
|
|
|
|||
|
|
@ -11,12 +11,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
|
||||
|
|
@ -49,6 +47,27 @@ public class ReadPosRankSumTest extends RankSumTest {
|
|||
}
|
||||
}
|
||||
|
||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
|
||||
// TODO -- implement me; how do we pull out the correct offset from the read?
|
||||
return;
|
||||
|
||||
/*
|
||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
||||
if ( !matchesRef && !matchesAlt )
|
||||
continue;
|
||||
|
||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
||||
if ( matchesRef )
|
||||
refQuals.add((double)read.getMappingQuality());
|
||||
else
|
||||
altQuals.add((double)read.getMappingQuality());
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele
|
||||
// to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element.
|
||||
|
|
|
|||
|
|
@ -33,10 +33,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -94,6 +92,13 @@ public class VariantAnnotatorEngine {
|
|||
initializeDBs();
|
||||
}
|
||||
|
||||
// experimental constructor for active region traversal
|
||||
public VariantAnnotatorEngine(GenomeAnalysisEngine toolkit) {
|
||||
this.walker = null;
|
||||
this.toolkit = toolkit;
|
||||
requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(Arrays.asList("ActiveRegionBasedAnnotation"), Collections.<String>emptyList());
|
||||
}
|
||||
|
||||
// select specific expressions to use
|
||||
public void initializeExpressions(List<String> expressionsToUse) {
|
||||
// set up the expressions
|
||||
|
|
@ -169,7 +174,7 @@ public class VariantAnnotatorEngine {
|
|||
this.requireStrictAlleleMatch = requireStrictAlleleMatch;
|
||||
}
|
||||
|
||||
public VariantContext annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public VariantContext annotateContext(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
||||
|
||||
// annotate db occurrences
|
||||
|
|
@ -192,6 +197,20 @@ public class VariantAnnotatorEngine {
|
|||
return builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc)).make();
|
||||
}
|
||||
|
||||
public VariantContext annotateContext(final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
||||
|
||||
// go through all the requested info annotationTypes
|
||||
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||
Map<String, Object> annotationsFromCurrentType = ((ActiveRegionBasedAnnotation)annotationType).annotate(stratifiedContexts, vc);
|
||||
if ( annotationsFromCurrentType != null )
|
||||
infoAnnotations.putAll(annotationsFromCurrentType);
|
||||
}
|
||||
|
||||
// generate a new annotated VC
|
||||
return new VariantContextBuilder(vc).attributes(infoAnnotations).make();
|
||||
}
|
||||
|
||||
private VariantContext annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map<String, Object> infoAnnotations) {
|
||||
for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) {
|
||||
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
// TODO -- make this an abstract class when we move away from InfoFieldAnnotation
|
||||
public interface ActiveRegionBasedAnnotation {
|
||||
public interface ActiveRegionBasedAnnotation extends AnnotationType {
|
||||
// return annotations for the given contexts split by sample and then allele
|
||||
public abstract Map<String, Object> annotate(final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, final VariantContext vc);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue