A framework to get annotations working in the HaplotypeCaller (and ART walkers in general).

Adding support for active-region-based annotation for most standard annotations.  I need to discuss with Ryan what to do about tests that require offsets into the reads (since I don't have access to the offsets) like e.g. the ReadPosRankSumTest.

IMPORTANT NOTE: this is still very much a dev effort and can only be accessed through private walkers (i.e. the HaplotypeCaller).  The interface is in flux and so we are making no attempt at all to make it clean or to merge this with the Locus-Traversal-based annotation system.  When we are satisfied that it's working properly and have settled on the proper interface, we will clean it up then.
This commit is contained in:
Eric Banks 2012-04-11 14:43:40 -04:00
parent 5b7da3831f
commit 5bf9dd2def
12 changed files with 309 additions and 33 deletions

View File

@ -5,12 +5,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.*;
/**
@ -31,8 +29,31 @@ public class BaseQualityRankSumTest extends RankSumTest {
altQuals.add((double)p.getQual());
}
}
}
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
// TODO -- implement me; how do we pull out the correct offset from the read?
return;
/*
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
final boolean matchesRef = ref.equals(alleleBin.getKey());
final boolean matchesAlt = alts.contains(alleleBin.getKey());
if ( !matchesRef && !matchesAlt )
continue;
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
if ( isUsableBase(p) ) {
if ( matchesRef )
refQuals.add((double)p.getQual());
else
altQuals.add((double)p.getQual());
}
}
}
*/
}
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
@ -35,6 +36,8 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -49,7 +52,7 @@ import java.util.Map;
* allele Frequency, for each ALT allele, in the same order as listed; total number
* of alleles in called genotypes.
*/
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
@ -63,6 +66,13 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if ( ! vc.hasGenotypes() )
return null;
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
}
public List<String> getKeyNames() {
return Arrays.asList(keyNames);
}

View File

@ -3,12 +3,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
@ -33,7 +36,7 @@ import java.util.Map;
* Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
* -dcov D is N * D
*/
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
@ -47,6 +50,22 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
return map;
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;
int depth = 0;
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
for ( final List<GATKSAMRecord> alleleBin : alleleBins.values() ) {
depth += alleleBin.size();
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%d", depth));
return map;
}
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.DEPTH_KEY); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); }

View File

@ -28,6 +28,7 @@ import cern.jet.math.Arithmetic;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
@ -37,6 +38,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -49,7 +51,7 @@ import java.util.*;
* indicative of false positive calls. Note that the fisher strand test may not be
* calculated for certain complex indel cases or for multi-allelic sites.
*/
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320;
@ -78,6 +80,22 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
return map;
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if ( !vc.isVariant() )
return null;
int[][] table = getContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
Double pvalue = Math.max(pValueForContingencyTable(table), MIN_PVALUE);
if ( pvalue == null )
return null;
Map<String, Object> map = new HashMap<String, Object>();
map.put(FS, String.format("%.3f", QualityUtils.phredScaleErrorRate(pvalue)));
return map;
}
public List<String> getKeyNames() {
return Arrays.asList(FS);
}
@ -193,6 +211,38 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
return sum;
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
* allele1 # #
* allele2 # #
* @return a 2x2 contingency table
*/
private static int[][] getContingencyTable(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, Allele ref, Allele alt) {
int[][] table = new int[2][2];
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
final boolean matchesRef = ref.equals(alleleBin.getKey());
final boolean matchesAlt = alt.equals(alleleBin.getKey());
if ( !matchesRef && !matchesAlt )
continue;
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
boolean isFW = read.getReadNegativeStrandFlag();
int row = matchesRef ? 0 : 1;
int column = isFW ? 0 : 1;
table[row][column]++;
}
}
}
return table;
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
@ -214,8 +264,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
Allele base = Allele.create(p.getBase(), false);
boolean isFW = !p.getRead().getReadNegativeStrandFlag();
boolean matchesRef = ref.equals(base, true);
boolean matchesAlt = alt.equals(base, true);
final boolean matchesRef = ref.equals(base, true);
final boolean matchesAlt = alt.equals(base, true);
if ( matchesRef || matchesAlt ) {
int row = matchesRef ? 0 : 1;
int column = isFW ? 0 : 1;
@ -227,6 +277,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
return table;
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc

View File

@ -3,12 +3,15 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -27,12 +30,19 @@ import java.util.Map;
* more information. Note that the Inbreeding Coefficient will not be calculated for files
* with fewer than a minimum (generally 10) number of samples.
*/
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
private static final int MIN_SAMPLES = 10;
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
return calculateIC(vc);
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
return calculateIC(vc);
}
private Map<String, Object> calculateIC(final VariantContext vc) {
final GenotypesContext genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
return null;

View File

@ -6,12 +6,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.*;
/**
@ -35,6 +33,23 @@ public class MappingQualityRankSumTest extends RankSumTest {
}
}
}
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
final boolean matchesRef = ref.equals(alleleBin.getKey());
final boolean matchesAlt = alts.contains(alleleBin.getKey());
if ( !matchesRef && !matchesAlt )
continue;
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
if ( matchesRef )
refQuals.add((double)read.getMappingQuality());
else
altQuals.add((double)read.getMappingQuality());
}
}
}
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();

View File

@ -3,11 +3,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -23,7 +26,7 @@ import java.util.Map;
* Low scores are indicative of false positive calls and artifacts. Note that QualByDepth requires sequencing
* reads associated with the samples with polymorphic genotypes.
*/
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation {
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
@ -62,4 +65,40 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;
final GenotypesContext genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
int depth = 0;
for ( final Genotype genotype : genotypes ) {
// we care only about variant calls with likelihoods
if ( !genotype.isHet() && !genotype.isHomVar() )
continue;
final Map<Allele, List<GATKSAMRecord>> alleleBins = stratifiedContexts.get(genotype.getSampleName());
if ( alleleBins == null )
continue;
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
if ( !alleleBin.getKey().equals(Allele.NO_CALL) )
depth += alleleBin.getValue().size();
}
}
if ( depth == 0 )
return null;
double QD = -10.0 * vc.getLog10PError() / (double)depth;
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.2f", QD));
return map;
}
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
@ -13,6 +14,8 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
@ -24,7 +27,7 @@ import java.util.Map;
/**
* Root Mean Square of the mapping quality of the reads across all samples.
*/
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
@ -34,7 +37,7 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
for ( AlignmentContext context : stratifiedContexts.values() )
totalSize += context.size();
int[] qualities = new int[totalSize];
final int[] qualities = new int[totalSize];
int index = 0;
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
@ -54,6 +57,35 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
return map;
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;
int depth = 0;
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
depth += alleleBin.getValue().size();
}
}
final int[] qualities = new int[depth];
int index = 0;
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
for ( final List<GATKSAMRecord> reads : alleleBins.values() ) {
for ( final GATKSAMRecord read : reads ) {
if ( read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
qualities[index++] = read.getMappingQuality();
}
}
}
double rms = MathUtils.rms(qualities);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.2f", rms));
return map;
}
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "RMS Mapping Quality")); }

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
@ -12,6 +13,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
@ -26,7 +28,7 @@ import java.util.Map;
/**
* Abstract root for all RankSum based annotations
*/
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
static final boolean DEBUG = false;
@ -38,7 +40,6 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
if (genotypes == null || genotypes.size() == 0)
return null;
final ArrayList<Double> refQuals = new ArrayList<Double>();
final ArrayList<Double> altQuals = new ArrayList<Double>();
@ -104,12 +105,52 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
if (!Double.isNaN(testResults.first))
map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
return map;
}
protected abstract void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals);
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if (stratifiedContexts.size() == 0)
return null;
protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals);
final GenotypesContext genotypes = vc.getGenotypes();
if (genotypes == null || genotypes.size() == 0)
return null;
final ArrayList<Double> refQuals = new ArrayList<Double>();
final ArrayList<Double> altQuals = new ArrayList<Double>();
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
final Map<Allele, List<GATKSAMRecord>> context = stratifiedContexts.get(genotype.getSampleName());
if ( context == null )
continue;
fillQualsFromPileup(vc.getReference(), vc.getAlternateAlleles(), context, refQuals, altQuals);
}
if ( refQuals.size() == 0 || altQuals.size() == 0 )
return null;
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
for (final Double qual : altQuals) {
mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
}
for (final Double qual : refQuals) {
mannWhitneyU.add(qual, MannWhitneyU.USet.SET2);
}
// we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
final Pair<Double, Double> testResults = mannWhitneyU.runOneSidedTest(MannWhitneyU.USet.SET1);
final Map<String, Object> map = new HashMap<String, Object>();
if (!Double.isNaN(testResults.first))
map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
return map;
}
protected abstract void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals);
protected abstract void fillQualsFromPileup(final byte ref, final List<Byte> alts, final ReadBackedPileup pileup, final List<Double> refQuals, final List<Double> altQuals);
protected abstract void fillIndelQualsFromPileup(final ReadBackedPileup pileup, final List<Double> refQuals, final List<Double> altQuals);
protected static boolean isUsableBase(final PileupElement p) {
return !(p.isInsertionAtBeginningOfRead() ||

View File

@ -11,12 +11,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.*;
/**
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
@ -49,6 +47,27 @@ public class ReadPosRankSumTest extends RankSumTest {
}
}
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals) {
// TODO -- implement me; how do we pull out the correct offset from the read?
return;
/*
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
final boolean matchesRef = ref.equals(alleleBin.getKey());
final boolean matchesAlt = alts.contains(alleleBin.getKey());
if ( !matchesRef && !matchesAlt )
continue;
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
if ( matchesRef )
refQuals.add((double)read.getMappingQuality());
else
altQuals.add((double)read.getMappingQuality());
}
}
*/
}
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele
// to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element.

View File

@ -33,10 +33,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
@ -94,6 +92,13 @@ public class VariantAnnotatorEngine {
initializeDBs();
}
// experimental constructor for active region traversal
public VariantAnnotatorEngine(GenomeAnalysisEngine toolkit) {
this.walker = null;
this.toolkit = toolkit;
requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(Arrays.asList("ActiveRegionBasedAnnotation"), Collections.<String>emptyList());
}
// select specific expressions to use
public void initializeExpressions(List<String> expressionsToUse) {
// set up the expressions
@ -169,7 +174,7 @@ public class VariantAnnotatorEngine {
this.requireStrictAlleleMatch = requireStrictAlleleMatch;
}
public VariantContext annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public VariantContext annotateContext(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
// annotate db occurrences
@ -192,6 +197,20 @@ public class VariantAnnotatorEngine {
return builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc)).make();
}
public VariantContext annotateContext(final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
// go through all the requested info annotationTypes
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
Map<String, Object> annotationsFromCurrentType = ((ActiveRegionBasedAnnotation)annotationType).annotate(stratifiedContexts, vc);
if ( annotationsFromCurrentType != null )
infoAnnotations.putAll(annotationsFromCurrentType);
}
// generate a new annotated VC
return new VariantContextBuilder(vc).attributes(infoAnnotations).make();
}
private VariantContext annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map<String, Object> infoAnnotations) {
for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) {
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {

View File

@ -9,7 +9,7 @@ import java.util.List;
import java.util.Map;
// TODO -- make this an abstract class when we move away from InfoFieldAnnotation
public interface ActiveRegionBasedAnnotation {
public interface ActiveRegionBasedAnnotation extends AnnotationType {
// return annotations for the given contexts split by sample and then allele
public abstract Map<String, Object> annotate(final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, final VariantContext vc);