Better interface for the Annotator in how it interacts with VariantContext.

Also, added a proof of concept genotype-level annotation (not working yet, almost there).



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3035 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-03-18 20:41:57 +00:00
parent 96662d8d1b
commit b8e8852b4f
25 changed files with 236 additions and 99 deletions

View File

@ -9,11 +9,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;
public class Alignability implements InfoFieldAnnotation {
public String annotate(RefMetaDataTracker tracker,
public Map<String, Object> annotate(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, StratifiedAlignmentContext> stratifiedContexts,
VariantContext vc)
@ -29,7 +30,10 @@ public class Alignability implements InfoFieldAnnotation {
}
value = Integer.parseInt(record.get("alignability"));
}
return String.format("%d", value);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%d", value));
return map;
}
public String getKeyName() { return "Alignability"; }

View File

@ -9,11 +9,12 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;
public class AlleleBalance implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( !vc.isBiallelic() || !vc.isSNP() )
return null;
@ -55,7 +56,9 @@ public class AlleleBalance implements InfoFieldAnnotation, StandardAnnotation {
if ( MathUtils.compareDoubles(totalWeights, 0.0) == 0 )
return null;
return String.format("%.2f", (ratio / totalWeights));
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%.2f", (ratio / totalWeights)));
return map;
}
public String getKeyName() { return "AB"; }

View File

@ -9,15 +9,18 @@ import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;
public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
int depth = 0;
for ( String sample : stratifiedContexts.keySet() )
depth += stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size();
return String.format("%d", depth);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%d", depth));
return map;
}
public String getKeyName() { return VCFRecord.DEPTH_KEY; }

View File

@ -0,0 +1,53 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import java.util.*;
public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
// for now, we don't support indels
if ( g == null || !g.isCalled() || vc.getType() != VariantContext.Type.SNP )
return null;
Set<Allele> altAlleles = vc.getAlternateAlleles();
if ( altAlleles.size() == 0 )
return null;
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : altAlleles )
alleleCounts.put(allele.getBases()[0], 0);
ReadBackedPileup pileup = stratifiedContext.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
for (PileupElement p : pileup ) {
if ( alleleCounts.containsKey(p.getBase()) )
alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
}
StringBuffer sb = new StringBuffer();
// we need to add counts in the correct order
for ( Allele allele : vc.getAlleles() ) {
if ( alleleCounts.containsKey(allele.getBases()[0]) ) {
if ( sb.length() > 0 )
sb.append(",");
sb.append(String.format("%d", alleleCounts.get(allele.getBases()[0])));
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), sb.toString());
return map;
}
public String getKeyName() { return "AD"; }
public VCFFormatHeaderLine getDescription() { return new VCFFormatHeaderLine(getKeyName(), 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Depth in genotypes for each ALT allele, in the same order as listed"); }
}

View File

@ -9,13 +9,16 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;
public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
double content = computeGCContent(ref);
return String.format("%.2f", content);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%.2f", content));
return map;
}
public String getKeyName() { return "GC"; }

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.Map;
import java.util.HashMap;
public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation {
@ -18,7 +19,7 @@ public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotat
private static final int MIN_GENOTYPE_QUALITY = 10;
private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
final Map<String, Genotype> genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
@ -53,7 +54,9 @@ public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotat
double pvalue = HardyWeinbergCalculation.hwCalculate(refCount, hetCount, homCount);
//System.out.println(refCount + " " + hetCount + " " + homCount + " " + pvalue);
return String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue));
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue)));
return map;
}
public String getKeyName() { return "HW"; }

View File

@ -10,17 +10,20 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;
public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( !vc.isBiallelic() || !vc.isSNP() )
return null;
int run = computeHomopolymerRun(vc.getAlternateAllele(0).toString().charAt(0), ref);
return String.format("%d", run);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%d", run));
return map;
}
public String getKeyName() { return "HRun"; }

View File

@ -10,11 +10,12 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;
public class LowMQ implements InfoFieldAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
double mq0 = 0;
double mq10 = 0;
double total = 0;
@ -28,7 +29,9 @@ public class LowMQ implements InfoFieldAnnotation {
total += 1;
}
}
return String.format("%.04f,%.04f,%.00f", mq0/total, mq10/total, total);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%.04f,%.04f,%.00f", mq0/total, mq10/total, total));
return map;
}
public String getKeyName() { return "LowMQ"; }

View File

@ -11,11 +11,12 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;
public class MappingQualityZero implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
int mq0 = 0;
for ( String sample : stratifiedContexts.keySet() ) {
ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
@ -24,7 +25,9 @@ public class MappingQualityZero implements InfoFieldAnnotation, StandardAnnotati
mq0++;
}
}
return String.format("%d", mq0);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%d", mq0));
return map;
}
public String getKeyName() { return "MQ0"; }

View File

@ -10,11 +10,12 @@ import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.ArrayList;
import java.util.HashMap;
public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
final Map<String, Genotype> genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
@ -25,7 +26,9 @@ public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
return null;
double QbyD = 10.0 * vc.getNegLog10PError() / (double)qDepth;
return String.format("%.2f", QbyD);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%.2f", QbyD));
return map;
}
public String getKeyName() { return "QD"; }

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;
public class QualityAdjustedSecondBaseLod implements InfoFieldAnnotation, ExperimentalAnnotation {
private final String KEY_NAME = "Qual_Adjusted_2blod";
@ -20,12 +21,14 @@ public class QualityAdjustedSecondBaseLod implements InfoFieldAnnotation, Experi
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(KEY_NAME, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Adjusted residual quality based on second-base skew"); }
public String annotate( RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> contexts, VariantContext vc) {
String chi = skewCalc.annotate(tracker, ref,contexts,vc);
if ( chi == null )
return null;
public Map<String, Object> annotate( RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> contexts, VariantContext vc) {
String chi = skewCalc.getAnnotation(ref, contexts, vc);
if ( chi == null )
return null;
double chi_square = Double.valueOf(chi);
double chi_loglik = chi_square <= 0.0 ? 0.0 : Math.max(-(chi_square/2.0)*log10e + log10half,CHI_LOD_MAX); // cap it...
return String.format("%f", 10*(vc.getNegLog10PError() + chi_loglik));
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%f", 10*(vc.getNegLog10PError() + chi_loglik)));
return map;
}
}

View File

@ -14,11 +14,12 @@ import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.ArrayList;
import java.util.HashMap;
public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
ArrayList<Integer> qualities = new ArrayList<Integer>();
for ( String sample : stratifiedContexts.keySet() ) {
ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
@ -30,7 +31,9 @@ public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotatio
for ( Integer i : qualities )
quals[index++] = i;
double rms = MathUtils.rms(quals);
return String.format("%.2f", rms);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%.2f", rms));
return map;
}
public String getKeyName() { return VCFRecord.RMS_MAPPING_QUALITY_KEY; }

View File

@ -11,13 +11,14 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
public abstract class RankSumTest implements InfoFieldAnnotation, WorkInProgressAnnotation {
private final static boolean DEBUG = false;
private static final double minPValue = 1e-10;
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( !vc.isBiallelic() || !vc.isSNP() )
return null;
@ -63,7 +64,9 @@ public abstract class RankSumTest implements InfoFieldAnnotation, WorkInProgress
if ( pvalue < minPValue )
pvalue = minPValue;
return String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue));
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue)));
return map;
}
protected abstract void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List<Integer> refQuals, List<Integer> altQuals);

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import java.util.Map;
import java.util.HashMap;
public class SecondBaseSkew implements InfoFieldAnnotation, ExperimentalAnnotation {
@ -24,7 +25,16 @@ public class SecondBaseSkew implements InfoFieldAnnotation, ExperimentalAnnotati
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(KEY_NAME, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Chi-square Secondary Base Skew"); }
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
String annotation = getAnnotation(ref, stratifiedContexts, vc);
if ( annotation == null )
return null;
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), annotation);
return map;
}
public String getAnnotation(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( !vc.isBiallelic() || !vc.isSNP() )
return null;

View File

@ -10,11 +10,12 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;
public class SpanningDeletions implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
int deletions = 0;
int depth = 0;
for ( String sample : stratifiedContexts.keySet() ) {
@ -22,7 +23,9 @@ public class SpanningDeletions implements InfoFieldAnnotation, StandardAnnotatio
deletions += pileup.getNumberOfDeletions();
depth += pileup.size();
}
return String.format("%.2f", depth == 0 ? 0.0 : (double)deletions/(double)depth);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%.2f", depth == 0 ? 0.0 : (double)deletions/(double)depth));
return map;
}
public String getKeyName() { return "Dels"; }

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.*;
@ -135,21 +134,19 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
if ( vc == null )
return 0;
MutableVariantContext mvc = new MutableVariantContext(vc);
// if the reference base is not ambiguous, we can annotate
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
if ( stratifiedContexts != null ) {
engine.annotateContext(tracker, ref, stratifiedContexts, mvc);
vc = engine.annotateContext(tracker, ref, stratifiedContexts, vc);
}
}
if ( variant instanceof RodVCF ) {
RodVCF vcf = (RodVCF)variant;
vcfWriter.addRecord(VariantContextAdaptors.toVCF(mvc, ref.getBase(), Arrays.asList(vcf.getRecord().getGenotypeFormatString().split(VCFRecord.GENOTYPE_FIELD_SEPERATOR)), vcf.getFilterString() != null));
vcfWriter.addRecord(VariantContextAdaptors.toVCF(vc, ref.getBase(), Arrays.asList(vcf.getRecord().getGenotypeFormatString().split(VCFRecord.GENOTYPE_FIELD_SEPERATOR)), vcf.getFilterString() != null));
} else {
vcfWriter.addRecord(VariantContextAdaptors.toVCF(mvc, ref.getBase()));
vcfWriter.addRecord(VariantContextAdaptors.toVCF(vc, ref.getBase()));
}
return 1;

View File

@ -1,7 +1,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
@ -72,9 +73,10 @@ public class VariantAnnotatorEngine {
requestedGenotypeAnnotations = new ArrayList<GenotypeAnnotation>();
for ( Class c : classes ) {
// note that technically an annotation can work on both the INFO and FORMAT fields
if ( InfoFieldAnnotation.class.isAssignableFrom(c) )
requestedInfoAnnotations.add((InfoFieldAnnotation)getInstance(c));
else if ( GenotypeAnnotation.class.isAssignableFrom(c) )
if ( GenotypeAnnotation.class.isAssignableFrom(c) )
requestedGenotypeAnnotations.add((GenotypeAnnotation)getInstance(c));
}
@ -134,36 +136,58 @@ public class VariantAnnotatorEngine {
return descriptions;
}
public void annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, MutableVariantContext vc) {
public VariantContext annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
Map<String, Object> infoAnnotations = new HashMap<String, Object>(vc.getAttributes());
// annotate dbsnp occurrence
if ( annotateDbsnp ) {
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null));
vc.putAttribute(VCFRecord.DBSNP_KEY, dbsnp == null ? "0" : "1");
infoAnnotations.put(VCFRecord.DBSNP_KEY, dbsnp == null ? "0" : "1");
// annotate dbsnp id if available and not already there
if ( dbsnp != null && !vc.hasAttribute("ID") )
vc.putAttribute("ID", dbsnp.getRS_ID());
infoAnnotations.put("ID", dbsnp.getRS_ID());
}
if ( annotateHapmap2 ) {
RODRecordList hapmap2 = tracker.getTrackData("hapmap2",null);
vc.putAttribute(VCFRecord.HAPMAP2_KEY, hapmap2 == null? "0" : "1");
infoAnnotations.put(VCFRecord.HAPMAP2_KEY, hapmap2 == null? "0" : "1");
}
if ( annotateHapmap3 ) {
RODRecordList hapmap3 = tracker.getTrackData("hapmap3",null);
vc.putAttribute(VCFRecord.HAPMAP3_KEY, hapmap3 == null ? "0" : "1");
infoAnnotations.put(VCFRecord.HAPMAP3_KEY, hapmap3 == null ? "0" : "1");
}
for ( InfoFieldAnnotation annotation : requestedInfoAnnotations ) {
String annot = annotation.annotate(tracker, ref, stratifiedContexts, vc);
if ( annot != null ) {
vc.putAttribute(annotation.getKeyName(), annot);
Map<String, Object> result = annotation.annotate(tracker, ref, stratifiedContexts, vc);
if ( result != null )
infoAnnotations.putAll(result);
}
Map<String, Genotype> genotypes;
if ( requestedGenotypeAnnotations.size() == 0 ) {
genotypes = vc.getGenotypes();
} else {
genotypes = new HashMap<String, Genotype>(vc.getNSamples());
for ( Map.Entry<String, Genotype> g : vc.getGenotypes().entrySet() ) {
Genotype genotype = g.getValue();
StratifiedAlignmentContext context = stratifiedContexts.get(g.getKey());
if ( context == null ) {
genotypes.put(g.getKey(), genotype);
continue;
}
Map<String, Object> genotypeAnnotations = new HashMap<String, Object>(genotype.getAttributes());
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
Map<String, Object> result = annotation.annotate(tracker, ref, context, vc, genotype);
if ( result != null )
genotypeAnnotations.putAll(result);
}
genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.genotypesArePhased()));
}
}
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
annotation.annotateContext(tracker, ref, stratifiedContexts, vc);
}
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.getFilters(), infoAnnotations);
}
}

View File

@ -4,14 +4,15 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.utils.genotype.vcf.VCFFormatHeaderLine;
import java.util.Map;
public interface GenotypeAnnotation {
// annotate the given record for the given variation and context split by sample
public void annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc);
// return annotations for the given contexts/genotype split by sample
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, StratifiedAlignmentContext stratifiedContext, VariantContext vc, Genotype g);
// return the FORMAT key
public String getKeyName();

View File

@ -10,8 +10,8 @@ import java.util.Map;
public interface InfoFieldAnnotation {
// return the annotation for the given variation and context split by sample (return null for no annotation)
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc);
// return annotations for the given contexts split by sample
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc);
// return the INFO key
public String getKeyName();

View File

@ -417,7 +417,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
attributes.put("SB", new Double(strandScore));
}
VariantContext vc = new MutableVariantContext("UG_SNP_call", loc, alleles, genotypes, phredScaledConfidence/10.0, null, attributes);
VariantContext vc = new VariantContext("UG_SNP_call", loc, alleles, genotypes, phredScaledConfidence/10.0, null, attributes);
return new VariantCallContext(vc, phredScaledConfidence >= CONFIDENCE_THRESHOLD);
}

View File

@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
@ -227,7 +226,7 @@ public class UnifiedGenotyperEngine {
if ( call != null && call.vc != null ) {
// first off, we want to use the *unfiltered* context for the annotations
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, (MutableVariantContext)call.vc);
call.vc = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, call.vc);
}
}

View File

@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import java.util.Map;
import java.util.HashMap;
/**
* Created by IntelliJ IDEA.
@ -29,21 +30,22 @@ public class ProportionOfNonrefBasesSupportingSNP implements InfoFieldAnnotation
1,VCFInfoHeaderLine.INFO_TYPE.Float,"Simple proportion of non-reference bases that are the SNP base");
}
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, VariantContext vc) {
if ( ! vc.isSNP() || ! vc.isBiallelic() ) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, VariantContext vc) {
if ( ! vc.isSNP() || ! vc.isBiallelic() )
return null;
} else {
Pair<Integer,Integer> totalNonref_totalSNP = new Pair<Integer,Integer>(0,0);
for ( String sample : context.keySet() ) {
ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
totalNonref_totalSNP = getNonrefAndSNP(pileup, ref.getBase(), vc.getAlternateAllele(0).toString().charAt(0), totalNonref_totalSNP);
}
if ( totalNonref_totalSNP.equals(new Pair<Integer,Integer>(0,0)) )
return null;
double p = getProportionOfNonrefBasesThatAreSNP(totalNonref_totalSNP);
return String.format("%f", p );
Pair<Integer,Integer> totalNonref_totalSNP = new Pair<Integer,Integer>(0,0);
for ( String sample : context.keySet() ) {
ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
totalNonref_totalSNP = getNonrefAndSNP(pileup, ref.getBase(), vc.getAlternateAllele(0).toString().charAt(0), totalNonref_totalSNP);
}
if ( totalNonref_totalSNP.equals(new Pair<Integer,Integer>(0,0)) )
return null;
double p = getProportionOfNonrefBasesThatAreSNP(totalNonref_totalSNP);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%f", p ));
return map;
}
private Pair<Integer,Integer> getNonrefAndSNP(ReadBackedPileup p, char ref, char snp, Pair<Integer,Integer> totals) {

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import java.util.Map;
import java.util.HashMap;
/**
* Created by IntelliJ IDEA.
@ -26,21 +27,23 @@ public class ProportionOfRefSecondBasesSupportingSNP implements InfoFieldAnnotat
public String getKeyName() { return KEY_NAME; }
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, VariantContext vc) {
if ( ! vc.isSNP() || ! vc.isBiallelic() ) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, VariantContext vc) {
if ( ! vc.isSNP() || ! vc.isBiallelic() )
return null;
} else {
Pair<Integer,Integer> totalAndSNPSupporting = new Pair<Integer,Integer>(0,0);
for ( String sample : context.keySet() ) {
ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
totalAndSNPSupporting = getTotalRefAndSNPSupportCounts(pileup, ref.getBase(), vc.getAlternateAllele(0).toString().charAt(0), totalAndSNPSupporting);
}
if ( totalAndSNPSupporting.equals(new Pair<Integer,Integer>(0,0)) )
return null;
double p = getProportionOfRefSecondaryBasesSupportingSNP(totalAndSNPSupporting);
return String.format("%f", p );
Pair<Integer,Integer> totalAndSNPSupporting = new Pair<Integer,Integer>(0,0);
for ( String sample : context.keySet() ) {
ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
totalAndSNPSupporting = getTotalRefAndSNPSupportCounts(pileup, ref.getBase(), vc.getAlternateAllele(0).toString().charAt(0), totalAndSNPSupporting);
}
if ( totalAndSNPSupporting.equals(new Pair<Integer,Integer>(0,0)) )
return null;
double p = getProportionOfRefSecondaryBasesSupportingSNP(totalAndSNPSupporting);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%f", p ));
return map;
}
private double getProportionOfRefSecondaryBasesSupportingSNP(Pair<Integer,Integer> tRef_snpSupport) {

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import java.util.Map;
import java.util.HashMap;
/**
* Created by IntelliJ IDEA.
@ -29,21 +30,23 @@ public class ProportionOfSNPSecondBasesSupportingRef implements InfoFieldAnnotat
public boolean useZeroQualityReads() { return USE_MAPQ0_READS; }
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, VariantContext vc) {
if ( ! vc.isSNP() || ! vc.isBiallelic() ) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, VariantContext vc) {
if ( ! vc.isSNP() || ! vc.isBiallelic() )
return null;
} else {
Pair<Integer,Integer> totalAndSNPSupporting = new Pair<Integer,Integer>(0,0);
for ( String sample : context.keySet() ) {
ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
totalAndSNPSupporting = getTotalSNPandRefSupporting(pileup, ref.getBase(), vc.getAlternateAllele(0).toString().charAt(0), totalAndSNPSupporting);
}
if ( totalAndSNPSupporting.equals(new Pair<Integer,Integer>(0,0)) )
return null;
double p = getProportionOfSNPSecondaryBasesSupportingRef(totalAndSNPSupporting);
return String.format("%f", p );
Pair<Integer,Integer> totalAndSNPSupporting = new Pair<Integer,Integer>(0,0);
for ( String sample : context.keySet() ) {
ReadBackedPileup pileup = context.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
totalAndSNPSupporting = getTotalSNPandRefSupporting(pileup, ref.getBase(), vc.getAlternateAllele(0).toString().charAt(0), totalAndSNPSupporting);
}
if ( totalAndSNPSupporting.equals(new Pair<Integer,Integer>(0,0)) )
return null;
double p = getProportionOfSNPSecondaryBasesSupportingRef(totalAndSNPSupporting);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), String.format("%f", p ));
return map;
}
public double getProportionOfSNPSecondaryBasesSupportingRef(Pair<Integer,Integer> tSNP_refSupport) {

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;
/**
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
@ -28,25 +29,29 @@ public class ThousandGenomesAnnotator implements InfoFieldAnnotation {
1,VCFInfoHeaderLine.INFO_TYPE.String,"Is this site seen in Pilot1 or Pilot2 of 1KG?");
}
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, VariantContext vc) {
if ( tracker == null ) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> context, VariantContext vc) {
if ( tracker == null )
return null;
}
RODRecordList pilot1 = tracker.getTrackData("pilot1",null);
RODRecordList pilot2 = tracker.getTrackData("pilot2",null);
String result;
if ( pilot1 == null && pilot2 == null) {
return "0";
result = "0";
} else {
if ( pilot1 != null && ! ( (HapmapVCFROD) pilot1.get(0)).getRecord().isFiltered() ) {
return "1";
result = "1";
} else if ( pilot2 != null && ! ( (HapmapVCFROD) pilot2.get(0)).getRecord().isFiltered() ) {
return "1";
result = "1";
} else {
return "0";
result = "0";
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyName(), result);
return map;
}
}