Move VariantAnnotator over to use a StratifiedAlignmentContext split by sample.
The only major difference is that we are now able to get accurate allele balance ratios. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2321 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8f7554d44f
commit
2de7e1a178
|
|
@ -1,17 +1,17 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class AlleleBalance extends StandardVariantAnnotation {
|
||||
|
||||
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
|
||||
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
|
||||
|
||||
if ( !(variation instanceof VariantBackedByGenotype) )
|
||||
return null;
|
||||
|
|
@ -19,121 +19,47 @@ public class AlleleBalance extends StandardVariantAnnotation {
|
|||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
double ratio;
|
||||
Genotype g = genotypes.get(0);
|
||||
if ( g instanceof ReadBacked && g instanceof PosteriorsBacked ) {
|
||||
Pair<Double, Integer> weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup);
|
||||
if ( weightedBalance.second == 0 )
|
||||
return null;
|
||||
ratio = weightedBalance.first;
|
||||
} else {
|
||||
// this test doesn't make sense for homs
|
||||
Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes);
|
||||
if ( genotype == null )
|
||||
return null;
|
||||
int refCount = 0;
|
||||
int altCount = 0;
|
||||
for ( Genotype genotype : genotypes ) {
|
||||
// we care only about het calls
|
||||
if ( !genotype.isHet() )
|
||||
continue;
|
||||
|
||||
if ( !(genotype instanceof SampleBacked) )
|
||||
continue;
|
||||
|
||||
String sample = ((SampleBacked)genotype).getSampleName();
|
||||
StratifiedAlignmentContext context = stratifiedContexts.get(sample);
|
||||
if ( context == null )
|
||||
continue;
|
||||
|
||||
final String genotypeStr = genotype.getBases().toUpperCase();
|
||||
if ( genotypeStr.length() != 2 )
|
||||
return null;
|
||||
|
||||
final String bases = new String(pileup.getBases()).toUpperCase();
|
||||
final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup().getBases()).toUpperCase();
|
||||
if ( bases.length() == 0 )
|
||||
return null;
|
||||
|
||||
ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases);
|
||||
char a = genotypeStr.charAt(0);
|
||||
char b = genotypeStr.charAt(1);
|
||||
int aCount = Utils.countOccurrences(a, bases);
|
||||
int bCount = Utils.countOccurrences(b, bases);
|
||||
|
||||
refCount += a == ref.getBase() ? aCount : bCount;
|
||||
altCount += a == ref.getBase() ? bCount : aCount;
|
||||
}
|
||||
|
||||
// sanity check
|
||||
if ( refCount + altCount == 0 )
|
||||
return null;
|
||||
|
||||
double ratio = (double)refCount / (double)(refCount + altCount);
|
||||
return String.format("%.2f", ratio);
|
||||
}
|
||||
|
||||
public String getKeyName() { return "AB"; }
|
||||
|
||||
public String getDescription() { return "AB,1,Float,\"Allele Balance (ref/(ref+alt))\""; }
|
||||
|
||||
private double computeSingleBalance(char ref, final String genotypeStr, final String bases) {
|
||||
|
||||
char a = genotypeStr.charAt(0);
|
||||
char b = genotypeStr.charAt(1);
|
||||
int aCount = Utils.countOccurrences(a, bases);
|
||||
int bCount = Utils.countOccurrences(b, bases);
|
||||
|
||||
int refCount = a == ref ? aCount : bCount;
|
||||
int altCount = a == ref ? bCount : aCount;
|
||||
|
||||
double ratio = (double)refCount / (double)(refCount + altCount);
|
||||
return ratio;
|
||||
}
|
||||
|
||||
// returns the ratio and then number of points which comprise it
|
||||
private Pair<Double, Integer> computeWeightedBalance(char ref, List<Genotype> genotypes, ReadBackedPileup pileup) {
|
||||
|
||||
ArrayList<Double> refBalances = new ArrayList<Double>();
|
||||
ArrayList<Double> weights = new ArrayList<Double>();
|
||||
|
||||
// accumulate ratios and weights
|
||||
for ( Genotype g : genotypes ) {
|
||||
|
||||
if ( !(g instanceof ReadBacked) || !(g instanceof PosteriorsBacked) )
|
||||
continue;
|
||||
|
||||
final String genotypeStr = g.getBases().toUpperCase();
|
||||
if ( genotypeStr.length() != 2 )
|
||||
continue;
|
||||
|
||||
DiploidGenotype bestGenotype = DiploidGenotype.unorderedValueOf(genotypeStr);
|
||||
|
||||
// we care only about het ref calls
|
||||
if ( !bestGenotype.isHetRef(ref) )
|
||||
continue;
|
||||
|
||||
char a = genotypeStr.charAt(0);
|
||||
char b = genotypeStr.charAt(1);
|
||||
char altBase = a != ref ? a : b;
|
||||
|
||||
// get the base counts at this pileup (minus deletions)
|
||||
ReadBackedPileup myPileup = ((ReadBacked)g).getPileup();
|
||||
|
||||
// if the pileup is null, we'll just have to use the full pileup (it's better than nothing)
|
||||
if ( myPileup == null )
|
||||
myPileup = pileup;
|
||||
|
||||
int[] counts = myPileup.getBaseCounts();
|
||||
int refCount = counts[BaseUtils.simpleBaseToBaseIndex(ref)];
|
||||
int altCount = counts[BaseUtils.simpleBaseToBaseIndex(altBase)];
|
||||
|
||||
// sanity check
|
||||
if ( refCount + altCount == 0 )
|
||||
continue;
|
||||
|
||||
double[] posteriors = ((PosteriorsBacked)g).getPosteriors();
|
||||
posteriors = MathUtils.normalizeFromLog10(posteriors);
|
||||
double weight = posteriors[bestGenotype.ordinal()];
|
||||
|
||||
// sanity check
|
||||
if ( MathUtils.compareDoubles(weight, 0.0) == 0 )
|
||||
continue;
|
||||
|
||||
weights.add(weight);
|
||||
refBalances.add((double)refCount / (double)(refCount + altCount));
|
||||
}
|
||||
|
||||
double ratio = 0.0;
|
||||
|
||||
if ( weights.size() > 0 ) {
|
||||
// normalize the weights
|
||||
double sum = 0.0;
|
||||
for (int i = 0; i < weights.size(); i++)
|
||||
sum += weights.get(i);
|
||||
for (int i = 0; i < weights.size(); i++)
|
||||
weights.set(i, weights.get(i) / sum);
|
||||
|
||||
// calculate total weighted ratios
|
||||
for (int i = 0; i < weights.size(); i++)
|
||||
ratio += weights.get(i) * refBalances.get(i);
|
||||
}
|
||||
|
||||
return new Pair<Double, Integer>(ratio, weights.size());
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
public String getDescription() { return "AB,1,Float,\"Allele Balance for hets (ref/(ref+alt))\""; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,21 +1,23 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class DepthOfCoverage extends StandardVariantAnnotation {
|
||||
|
||||
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
|
||||
int depth = pileup.getReads().size();
|
||||
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
|
||||
int depth = 0;
|
||||
for ( String sample : stratifiedContexts.keySet() )
|
||||
depth += stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup().size();
|
||||
return String.format("%d", depth);
|
||||
}
|
||||
|
||||
public String getKeyName() { return VCFRecord.DEPTH_KEY; }
|
||||
|
||||
public String getDescription() { return getKeyName() + ",1,Integer,\"Total Depth (including MQ0 reads)\""; }
|
||||
|
||||
public boolean useZeroQualityReads() { return true; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,16 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class HomopolymerRun extends StandardVariantAnnotation {
|
||||
|
||||
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
|
||||
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
|
||||
|
||||
if ( !variation.isBiallelic() || !variation.isSNP() )
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -1,27 +1,35 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
||||
public class RMSMappingQuality extends StandardVariantAnnotation {
|
||||
|
||||
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
|
||||
int[] qualities = new int[pileup.size()];
|
||||
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
|
||||
ArrayList<Integer> qualities = new ArrayList<Integer>();
|
||||
for ( String sample : stratifiedContexts.keySet() ) {
|
||||
ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
|
||||
for (PileupElement p : pileup )
|
||||
qualities.add(p.getRead().getMappingQuality());
|
||||
}
|
||||
int[] quals = new int[qualities.size()];
|
||||
int index = 0;
|
||||
for (PileupElement p : pileup )
|
||||
qualities[index++] = p.getRead().getMappingQuality();
|
||||
double rms = MathUtils.rms(qualities);
|
||||
for ( Integer i : qualities )
|
||||
quals[index++] = i;
|
||||
double rms = MathUtils.rms(quals);
|
||||
return String.format("%.2f", rms);
|
||||
}
|
||||
|
||||
public String getKeyName() { return VCFRecord.RMS_MAPPING_QUALITY_KEY; }
|
||||
|
||||
public String getDescription() { return getKeyName() + ",1,Float,\"RMS Mapping Quality\""; }
|
||||
|
||||
public boolean useZeroQualityReads() { return true; }
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -8,13 +9,14 @@ import org.broadinstitute.sting.utils.genotype.*;
|
|||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class RankSumTest implements VariantAnnotation {
|
||||
private final static boolean DEBUG = false;
|
||||
private static final double minPValue = 1e-10;
|
||||
|
||||
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
|
||||
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
|
||||
|
||||
if ( !(variation instanceof VariantBackedByGenotype) )
|
||||
return null;
|
||||
|
|
@ -22,18 +24,20 @@ public class RankSumTest implements VariantAnnotation {
|
|||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
// this test doesn't make sense for homs
|
||||
Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes);
|
||||
if ( genotype == null )
|
||||
return null;
|
||||
|
||||
ArrayList<Integer> refQuals = new ArrayList<Integer>();
|
||||
ArrayList<Integer> altQuals = new ArrayList<Integer>();
|
||||
|
||||
if ( genotype instanceof ReadBacked && ((ReadBacked)genotype).getPileup() != null )
|
||||
fillQualsFromGenotypes(ref.getBase(), variation.getAlternativeBaseForSNP(), genotypes, refQuals, altQuals);
|
||||
else
|
||||
fillQualsFromPileup(ref.getBase(), variation.getAlternativeBaseForSNP(), pileup, refQuals, altQuals);
|
||||
for ( Genotype genotype : genotypes ) {
|
||||
// we care only about het calls
|
||||
if ( genotype.isHet() ) {
|
||||
String sample = ((SampleBacked)genotype).getSampleName();
|
||||
StratifiedAlignmentContext context = stratifiedContexts.get(sample);
|
||||
if ( context == null )
|
||||
continue;
|
||||
|
||||
fillQualsFromPileup(ref.getBase(), variation.getAlternativeBaseForSNP(), context.getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(), refQuals, altQuals);
|
||||
}
|
||||
}
|
||||
|
||||
WilcoxonRankSum wilcoxon = new WilcoxonRankSum();
|
||||
for ( Integer qual : altQuals )
|
||||
|
|
@ -44,7 +48,7 @@ public class RankSumTest implements VariantAnnotation {
|
|||
// for R debugging
|
||||
if ( DEBUG ) {
|
||||
wilcoxon.DEBUG = DEBUG;
|
||||
System.out.printf("%s%n", pileup.getLocation());
|
||||
System.out.printf("%s%n", ref.getLocus());
|
||||
System.out.printf("alt <- c(%s)%n", Utils.join(",", altQuals));
|
||||
System.out.printf("ref <- c(%s)%n", Utils.join(",", refQuals));
|
||||
}
|
||||
|
|
@ -78,20 +82,4 @@ public class RankSumTest implements VariantAnnotation {
|
|||
altQuals.add((int)p.getQual());
|
||||
}
|
||||
}
|
||||
|
||||
private void fillQualsFromGenotypes(char ref, char alt, List<Genotype> genotypes, List<Integer> refQuals, List<Integer> altQuals) {
|
||||
// accumulate quals
|
||||
for ( Genotype g : genotypes ) {
|
||||
if ( !(g instanceof ReadBacked) )
|
||||
continue;
|
||||
|
||||
ReadBackedPileup pileup = ((ReadBacked)g).getPileup();
|
||||
if ( pileup == null )
|
||||
continue;
|
||||
|
||||
fillQualsFromPileup(ref, alt, pileup, refQuals, altQuals);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
}
|
||||
|
|
@ -6,6 +6,9 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -17,35 +20,39 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
*/
|
||||
public class SecondBaseSkew implements VariantAnnotation {
|
||||
private final static double epsilon = Math.pow(10.0,-12);
|
||||
private final static boolean USE_ZERO_QUALITY_READS = false; // todo -- should be false in my opinion MAD
|
||||
private final static String KEY_NAME = "2b_Chi";
|
||||
private final static double[] UNIFORM_ON_OFF_RATIO = {1.0/3, 2.0/3};
|
||||
private double[] proportionExpectations = UNIFORM_ON_OFF_RATIO;
|
||||
|
||||
public boolean useZeroQualityReads() { return USE_ZERO_QUALITY_READS; }
|
||||
|
||||
public String getKeyName() { return KEY_NAME; }
|
||||
|
||||
public String getDescription() { return KEY_NAME + ",1,Float,\"Chi-square Secondary Base Skew\""; }
|
||||
|
||||
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
|
||||
if ( variation.isSNP() && variation.isBiallelic() ) {
|
||||
char snp = variation.getAlternativeBaseForSNP();
|
||||
Pair<Integer,Double> depthProp = getSecondaryPileupNonrefEstimator(ref.getBase(), pileup, snp);
|
||||
if ( depthProp == null ) {
|
||||
return null;
|
||||
} else {
|
||||
//System.out.printf("%d / %f%n", depthProp.getFirst(), depthProp.getSecond());
|
||||
double p_transformed = transform(depthProp.getSecond(), depthProp.getFirst());
|
||||
double expected_transformed = transform(proportionExpectations[0], depthProp.getFirst());
|
||||
// System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst());
|
||||
// System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]);
|
||||
double chi_square = Math.signum(depthProp.getSecond() - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE);
|
||||
return String.format("%f", chi_square);
|
||||
}
|
||||
} else {
|
||||
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
|
||||
if ( !variation.isBiallelic() || !variation.isSNP() )
|
||||
return null;
|
||||
|
||||
char snp = variation.getAlternativeBaseForSNP();
|
||||
|
||||
Pair<Integer, Integer> depth = new Pair<Integer, Integer>(0, 0);
|
||||
for ( String sample : stratifiedContexts.keySet() ) {
|
||||
Pair<Integer, Integer> sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(), stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(), snp);
|
||||
depth.first += sampleDepth.first;
|
||||
depth.second += sampleDepth.second;
|
||||
}
|
||||
|
||||
if ( depth.first == 0 )
|
||||
return null;
|
||||
|
||||
double biasedProportion = (1.0 + depth.second) / (1.0 + depth.first);
|
||||
|
||||
//System.out.printf("%d / %f%n", depthProp.getFirst(), depthProp.getSecond());
|
||||
double p_transformed = transform(biasedProportion, depth.first+1);
|
||||
double expected_transformed = transform(proportionExpectations[0], depth.first+1);
|
||||
// System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst());
|
||||
// System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]);
|
||||
double chi_square = Math.signum(biasedProportion - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE);
|
||||
return String.format("%f", chi_square);
|
||||
}
|
||||
|
||||
private double transform( double proportion, int depth ) {
|
||||
|
|
@ -53,7 +60,7 @@ public class SecondBaseSkew implements VariantAnnotation {
|
|||
return proportion / ( Math.sqrt ( proportion*(1-proportion)/depth ) );
|
||||
}
|
||||
|
||||
private Pair<Integer, Double> getSecondaryPileupNonrefEstimator(char ref, ReadBackedPileup p, char snp ) {
|
||||
private Pair<Integer, Integer> getSecondaryPileupNonrefCount(char ref, ReadBackedPileup p, char snp ) {
|
||||
int variantDepth = 0;
|
||||
int variantsWithRefSecondBase = 0;
|
||||
|
||||
|
|
@ -69,11 +76,6 @@ public class SecondBaseSkew implements VariantAnnotation {
|
|||
}
|
||||
}
|
||||
|
||||
if ( variantDepth > 0 ) {
|
||||
double biasedProportion = ( 1.0 + variantsWithRefSecondBase )/(1.0 + variantDepth );
|
||||
return new Pair<Integer,Double>(variantDepth+1, biasedProportion);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
return new Pair<Integer, Integer>(variantDepth, variantsWithRefSecondBase);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,20 +1,27 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class SpanningDeletions extends StandardVariantAnnotation {
|
||||
|
||||
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
|
||||
int deletions = pileup.getNumberOfDeletions();
|
||||
return String.format("%.2f", (double)deletions/(double)pileup.size());
|
||||
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
|
||||
int deletions = 0;
|
||||
int depth = 0;
|
||||
for ( String sample : stratifiedContexts.keySet() ) {
|
||||
ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
|
||||
deletions += pileup.getNumberOfDeletions();
|
||||
depth += pileup.size();
|
||||
}
|
||||
return String.format("%.2f", (double)deletions/(double)depth);
|
||||
}
|
||||
|
||||
public String getKeyName() { return "Dels"; }
|
||||
|
||||
public String getDescription() { return "Dels,1,Float,\"Fraction of Reads Containing Spanning Deletions\""; }
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
}
|
||||
|
|
@ -1,17 +1,16 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public interface VariantAnnotation {
|
||||
|
||||
// return the annotation for the given locus data (return null for no annotation)
|
||||
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation);
|
||||
|
||||
// return true if you want to use a context with mapping quality zero reads, false otherwise
|
||||
public boolean useZeroQualityReads();
|
||||
// return the annotation for the given variation and context split by sample (return null for no annotation)
|
||||
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation);
|
||||
|
||||
// return the INFO key
|
||||
public String getKeyName();
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ import org.broadinstitute.sting.gatk.contexts.*;
|
|||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
|
|
@ -165,9 +164,11 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
// if the reference base is not ambiguous, the variant is a SNP, and it's the appropriate type, we can annotate
|
||||
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 &&
|
||||
variant.isBiallelic() &&
|
||||
variant.isSNP() )
|
||||
annotations = getAnnotations(ref, context, variant, requestedAnnotations);
|
||||
|
||||
variant.isSNP() ) {
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context, null, null);
|
||||
if ( stratifiedContexts != null )
|
||||
annotations = getAnnotations(ref, stratifiedContexts, variant, requestedAnnotations);
|
||||
}
|
||||
writeVCF(tracker, ref, context, variant, annotations);
|
||||
|
||||
return 1;
|
||||
|
|
@ -208,30 +209,26 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// option #1: don't specify annotations to be used: standard annotations are used by default
|
||||
public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation) {
|
||||
public static Map<String, String> getAnnotations(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
|
||||
if ( standardAnnotations == null )
|
||||
determineAllAnnotations();
|
||||
return getAnnotations(ref, context, variation, standardAnnotations.values());
|
||||
return getAnnotations(ref, stratifiedContexts, variation, standardAnnotations.values());
|
||||
}
|
||||
|
||||
// option #2: specify that all possible annotations be used
|
||||
public static Map<String, String> getAllAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation) {
|
||||
public static Map<String, String> getAllAnnotations(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
|
||||
if ( allAnnotations == null )
|
||||
determineAllAnnotations();
|
||||
return getAnnotations(ref, context, variation, allAnnotations.values());
|
||||
return getAnnotations(ref, stratifiedContexts, variation, allAnnotations.values());
|
||||
}
|
||||
|
||||
// option #3: specify the exact annotations to be used
|
||||
public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, Collection<VariantAnnotation> annotations) {
|
||||
public static Map<String, String> getAnnotations(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, Collection<VariantAnnotation> annotations) {
|
||||
|
||||
HashMap<String, String> results = new HashMap<String, String>();
|
||||
|
||||
// set up the pileup for the full collection of reads at this position
|
||||
ReadBackedPileup fullPileup = context.getPileup();
|
||||
ReadBackedPileup MQ0freePileup = fullPileup.getPileupWithoutMappingQualityZeroReads();
|
||||
|
||||
for ( VariantAnnotation annotator : annotations) {
|
||||
String annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), variation);
|
||||
String annot = annotator.annotate(ref, stratifiedContexts, variation);
|
||||
if ( annot != null ) {
|
||||
results.put(annotator.getKeyName(), annot);
|
||||
}
|
||||
|
|
@ -295,12 +292,4 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static Genotype getFirstHetVariant(List<Genotype> genotypes) {
|
||||
for ( Genotype g : genotypes ) {
|
||||
if ( g.isHet() )
|
||||
return g;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -221,9 +221,9 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) {
|
||||
Map<String, String> annotations;
|
||||
if ( UAC.ALL_ANNOTATIONS )
|
||||
annotations = VariantAnnotator.getAllAnnotations(refContext, context, call.first);
|
||||
annotations = VariantAnnotator.getAllAnnotations(refContext, stratifiedContexts, call.first);
|
||||
else
|
||||
annotations = VariantAnnotator.getAnnotations(refContext, context, call.first);
|
||||
annotations = VariantAnnotator.getAnnotations(refContext, stratifiedContexts, call.first);
|
||||
((ArbitraryFieldsBacked)call.first).setFields(annotations);
|
||||
}
|
||||
|
||||
|
|
@ -234,12 +234,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
return ( d >= 0.0 && d <= 1.0 );
|
||||
}
|
||||
|
||||
private static AlignmentContext filterAlignmentContext(AlignmentContext context) {
|
||||
return new AlignmentContext(context.getLocation(),
|
||||
context.getPileup().getPileupWithoutMappingQualityZeroReads(),
|
||||
0);
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Pair<VariationCall, List<Genotype>> value, Integer sum) {
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("6c8bdfc9af27c35a88452e23976a891f"));
|
||||
Arrays.asList("1225d977f3e83d362141c1bdb4730016"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("87697664fa1990af331581a5d3491b2c"));
|
||||
Arrays.asList("7234adba82893ebf63338ab20ecc610d"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("e9400ca07cdca0254346fc15d53606d1"));
|
||||
Arrays.asList("05d7f1c30130ce13d355200be8c2b31d"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("3380a891eb06e7425d48dc2f2c38ac3b"));
|
||||
Arrays.asList("643fa948531ebbfa013e9eeced76430b"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("12d07018d9604b86b854e0c9132b0608"));
|
||||
Arrays.asList("8f9ee611603dc75c7ca8b15d81000a82"));
|
||||
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("65a808f077447f89e001e1cce8433acc"));
|
||||
Arrays.asList("a8979577debf9c8d7ea4a40ee1a0f1ef"));
|
||||
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testPooled1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
|
||||
Arrays.asList("4890ce37667379a0b60892cb0b9d8e09"));
|
||||
Arrays.asList("253aba7ce0af7be4104dd275873a541d"));
|
||||
executeTest("testPooled1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("fd5291c7115a558a7a6309ab417d2d57"));
|
||||
Arrays.asList("1be2ce6ac40bdaf3d8f264f6d73c84ae"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("58ddc9426a25d0c3604af8da4eaa8294"));
|
||||
Arrays.asList("8f92a56da4bd56b8755e9db89c31bfff"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("52e7030fe0968cd1a9b95154d8056b3f"));
|
||||
Arrays.asList("67913655a8063f4deb9a6138f0844ddd"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -105,7 +105,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeModeJoint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -genotype -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
|
||||
Arrays.asList("1ac9c9c8be0eb6adee7c67fac7c19c0d"));
|
||||
Arrays.asList("d51760ad01195ab299925595c6f62bb4"));
|
||||
executeTest("testGenotypeMode - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -113,7 +113,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testAllBasesModeJoint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -all_bases -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
|
||||
Arrays.asList("37177e194a602749d31e5ecbf80ad309"));
|
||||
Arrays.asList("2322d3ebf09ebf901fbcb653119d0b47"));
|
||||
executeTest("testAllBasesMode - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue