diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/archive/java/src/org/broadinstitute/sting/FisherStrand.java similarity index 100% rename from java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java rename to archive/java/src/org/broadinstitute/sting/FisherStrand.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 4001e5f89..08ea295c3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -1,17 +1,17 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.*; import java.util.List; -import java.util.ArrayList; +import java.util.Map; public class AlleleBalance extends StandardVariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { + public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation) { if ( !(variation instanceof VariantBackedByGenotype) ) return null; @@ -19,121 +19,47 @@ public class AlleleBalance extends StandardVariantAnnotation { if ( genotypes == null || genotypes.size() == 0 ) return null; - double ratio; - Genotype g = genotypes.get(0); - if ( g instanceof ReadBacked && g instanceof PosteriorsBacked ) { - Pair weightedBalance = computeWeightedBalance(ref.getBase(), genotypes, pileup); - if ( weightedBalance.second == 0 ) - return null; - ratio = weightedBalance.first; - } else { - // this test doesn't make sense for homs - Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes); - if ( genotype == null ) - return null; + int refCount = 0; + int altCount = 0; + for ( Genotype genotype : genotypes ) { + // we care only about het calls + if ( !genotype.isHet() ) + continue; + + if ( !(genotype instanceof SampleBacked) ) + continue; + + String sample = ((SampleBacked)genotype).getSampleName(); + StratifiedAlignmentContext context = stratifiedContexts.get(sample); + if ( context == null ) + continue; final String genotypeStr = genotype.getBases().toUpperCase(); if ( genotypeStr.length() != 2 ) return null; - final String bases = new String(pileup.getBases()).toUpperCase(); + final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup().getBases()).toUpperCase(); if ( bases.length() == 0 ) return null; - ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases); + char a = genotypeStr.charAt(0); + char b = genotypeStr.charAt(1); + int aCount = Utils.countOccurrences(a, bases); + int bCount = Utils.countOccurrences(b, bases); + + refCount += a == ref.getBase() ? aCount : bCount; + altCount += a == ref.getBase() ? bCount : aCount; } + // sanity check + if ( refCount + altCount == 0 ) + return null; + + double ratio = (double)refCount / (double)(refCount + altCount); return String.format("%.2f", ratio); } public String getKeyName() { return "AB"; } - public String getDescription() { return "AB,1,Float,\"Allele Balance (ref/(ref+alt))\""; } - - private double computeSingleBalance(char ref, final String genotypeStr, final String bases) { - - char a = genotypeStr.charAt(0); - char b = genotypeStr.charAt(1); - int aCount = Utils.countOccurrences(a, bases); - int bCount = Utils.countOccurrences(b, bases); - - int refCount = a == ref ? aCount : bCount; - int altCount = a == ref ? bCount : aCount; - - double ratio = (double)refCount / (double)(refCount + altCount); - return ratio; - } - - // returns the ratio and then number of points which comprise it - private Pair computeWeightedBalance(char ref, List genotypes, ReadBackedPileup pileup) { - - ArrayList refBalances = new ArrayList(); - ArrayList weights = new ArrayList(); - - // accumulate ratios and weights - for ( Genotype g : genotypes ) { - - if ( !(g instanceof ReadBacked) || !(g instanceof PosteriorsBacked) ) - continue; - - final String genotypeStr = g.getBases().toUpperCase(); - if ( genotypeStr.length() != 2 ) - continue; - - DiploidGenotype bestGenotype = DiploidGenotype.unorderedValueOf(genotypeStr); - - // we care only about het ref calls - if ( !bestGenotype.isHetRef(ref) ) - continue; - - char a = genotypeStr.charAt(0); - char b = genotypeStr.charAt(1); - char altBase = a != ref ? a : b; - - // get the base counts at this pileup (minus deletions) - ReadBackedPileup myPileup = ((ReadBacked)g).getPileup(); - - // if the pileup is null, we'll just have to use the full pileup (it's better than nothing) - if ( myPileup == null ) - myPileup = pileup; - - int[] counts = myPileup.getBaseCounts(); - int refCount = counts[BaseUtils.simpleBaseToBaseIndex(ref)]; - int altCount = counts[BaseUtils.simpleBaseToBaseIndex(altBase)]; - - // sanity check - if ( refCount + altCount == 0 ) - continue; - - double[] posteriors = ((PosteriorsBacked)g).getPosteriors(); - posteriors = MathUtils.normalizeFromLog10(posteriors); - double weight = posteriors[bestGenotype.ordinal()]; - - // sanity check - if ( MathUtils.compareDoubles(weight, 0.0) == 0 ) - continue; - - weights.add(weight); - refBalances.add((double)refCount / (double)(refCount + altCount)); - } - - double ratio = 0.0; - - if ( weights.size() > 0 ) { - // normalize the weights - double sum = 0.0; - for (int i = 0; i < weights.size(); i++) - sum += weights.get(i); - for (int i = 0; i < weights.size(); i++) - weights.set(i, weights.get(i) / sum); - - // calculate total weighted ratios - for (int i = 0; i < weights.size(); i++) - ratio += weights.get(i) * refBalances.get(i); - } - - return new Pair(ratio, weights.size()); - } - - public boolean useZeroQualityReads() { return false; } + public String getDescription() { return "AB,1,Float,\"Allele Balance for hets (ref/(ref+alt))\""; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index 76f3aa852..2bd10b410 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -1,21 +1,23 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; +import java.util.Map; + public class DepthOfCoverage extends StandardVariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { - int depth = pileup.getReads().size(); + public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation) { + int depth = 0; + for ( String sample : stratifiedContexts.keySet() ) + depth += stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup().size(); return String.format("%d", depth); } public String getKeyName() { return VCFRecord.DEPTH_KEY; } public String getDescription() { return getKeyName() + ",1,Integer,\"Total Depth (including MQ0 reads)\""; } - - public boolean useZeroQualityReads() { return true; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index f8a27ce62..12695d852 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -1,14 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.genotype.Variation; +import java.util.Map; + public class HomopolymerRun extends StandardVariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { + public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation) { if ( !variation.isBiallelic() || !variation.isSNP() ) return null; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 9b19a104a..f5f8f8621 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -1,27 +1,35 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; +import java.util.Map; +import java.util.ArrayList; + public class RMSMappingQuality extends StandardVariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { - int[] qualities = new int[pileup.size()]; + public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation) { + ArrayList qualities = new ArrayList(); + for ( String sample : stratifiedContexts.keySet() ) { + ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(); + for (PileupElement p : pileup ) + qualities.add(p.getRead().getMappingQuality()); + } + int[] quals = new int[qualities.size()]; int index = 0; - for (PileupElement p : pileup ) - qualities[index++] = p.getRead().getMappingQuality(); - double rms = MathUtils.rms(qualities); + for ( Integer i : qualities ) + quals[index++] = i; + double rms = MathUtils.rms(quals); return String.format("%.2f", rms); } public String getKeyName() { return VCFRecord.RMS_MAPPING_QUALITY_KEY; } public String getDescription() { return getKeyName() + ",1,Float,\"RMS Mapping Quality\""; } - - public boolean useZeroQualityReads() { return true; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index d245e82e2..702cf1e67 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -8,13 +9,14 @@ import org.broadinstitute.sting.utils.genotype.*; import java.util.List; import java.util.ArrayList; +import java.util.Map; public class RankSumTest implements VariantAnnotation { private final static boolean DEBUG = false; private static final double minPValue = 1e-10; - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { + public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation) { if ( !(variation instanceof VariantBackedByGenotype) ) return null; @@ -22,18 +24,20 @@ public class RankSumTest implements VariantAnnotation { if ( genotypes == null || genotypes.size() == 0 ) return null; - // this test doesn't make sense for homs - Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes); - if ( genotype == null ) - return null; - ArrayList refQuals = new ArrayList(); ArrayList altQuals = new ArrayList(); - if ( genotype instanceof ReadBacked && ((ReadBacked)genotype).getPileup() != null ) - fillQualsFromGenotypes(ref.getBase(), variation.getAlternativeBaseForSNP(), genotypes, refQuals, altQuals); - else - fillQualsFromPileup(ref.getBase(), variation.getAlternativeBaseForSNP(), pileup, refQuals, altQuals); + for ( Genotype genotype : genotypes ) { + // we care only about het calls + if ( genotype.isHet() ) { + String sample = ((SampleBacked)genotype).getSampleName(); + StratifiedAlignmentContext context = stratifiedContexts.get(sample); + if ( context == null ) + continue; + + fillQualsFromPileup(ref.getBase(), variation.getAlternativeBaseForSNP(), context.getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(), refQuals, altQuals); + } + } WilcoxonRankSum wilcoxon = new WilcoxonRankSum(); for ( Integer qual : altQuals ) @@ -44,7 +48,7 @@ public class RankSumTest implements VariantAnnotation { // for R debugging if ( DEBUG ) { wilcoxon.DEBUG = DEBUG; - System.out.printf("%s%n", pileup.getLocation()); + System.out.printf("%s%n", ref.getLocus()); System.out.printf("alt <- c(%s)%n", Utils.join(",", altQuals)); System.out.printf("ref <- c(%s)%n", Utils.join(",", refQuals)); } @@ -78,20 +82,4 @@ public class RankSumTest implements VariantAnnotation { altQuals.add((int)p.getQual()); } } - - private void fillQualsFromGenotypes(char ref, char alt, List genotypes, List refQuals, List altQuals) { - // accumulate quals - for ( Genotype g : genotypes ) { - if ( !(g instanceof ReadBacked) ) - continue; - - ReadBackedPileup pileup = ((ReadBacked)g).getPileup(); - if ( pileup == null ) - continue; - - fillQualsFromPileup(ref, alt, pileup, refQuals, altQuals); - } - } - - public boolean useZeroQualityReads() { return false; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java index 20cec5344..a07c08e9c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java @@ -6,6 +6,9 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; + +import java.util.Map; /** @@ -17,35 +20,39 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; */ public class SecondBaseSkew implements VariantAnnotation { private final static double epsilon = Math.pow(10.0,-12); - private final static boolean USE_ZERO_QUALITY_READS = false; // todo -- should be false in my opinion MAD private final static String KEY_NAME = "2b_Chi"; private final static double[] UNIFORM_ON_OFF_RATIO = {1.0/3, 2.0/3}; private double[] proportionExpectations = UNIFORM_ON_OFF_RATIO; - public boolean useZeroQualityReads() { return USE_ZERO_QUALITY_READS; } - public String getKeyName() { return KEY_NAME; } public String getDescription() { return KEY_NAME + ",1,Float,\"Chi-square Secondary Base Skew\""; } - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { - if ( variation.isSNP() && variation.isBiallelic() ) { - char snp = variation.getAlternativeBaseForSNP(); - Pair depthProp = getSecondaryPileupNonrefEstimator(ref.getBase(), pileup, snp); - if ( depthProp == null ) { - return null; - } else { - //System.out.printf("%d / %f%n", depthProp.getFirst(), depthProp.getSecond()); - double p_transformed = transform(depthProp.getSecond(), depthProp.getFirst()); - double expected_transformed = transform(proportionExpectations[0], depthProp.getFirst()); - // System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst()); - // System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]); - double chi_square = Math.signum(depthProp.getSecond() - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE); - return String.format("%f", chi_square); - } - } else { + public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation) { + if ( !variation.isBiallelic() || !variation.isSNP() ) return null; + + char snp = variation.getAlternativeBaseForSNP(); + + Pair depth = new Pair(0, 0); + for ( String sample : stratifiedContexts.keySet() ) { + Pair sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(), stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(), snp); + depth.first += sampleDepth.first; + depth.second += sampleDepth.second; } + + if ( depth.first == 0 ) + return null; + + double biasedProportion = (1.0 + depth.second) / (1.0 + depth.first); + + //System.out.printf("%d / %f%n", depthProp.getFirst(), depthProp.getSecond()); + double p_transformed = transform(biasedProportion, depth.first+1); + double expected_transformed = transform(proportionExpectations[0], depth.first+1); + // System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst()); + // System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]); + double chi_square = Math.signum(biasedProportion - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE); + return String.format("%f", chi_square); } private double transform( double proportion, int depth ) { @@ -53,7 +60,7 @@ public class SecondBaseSkew implements VariantAnnotation { return proportion / ( Math.sqrt ( proportion*(1-proportion)/depth ) ); } - private Pair getSecondaryPileupNonrefEstimator(char ref, ReadBackedPileup p, char snp ) { + private Pair getSecondaryPileupNonrefCount(char ref, ReadBackedPileup p, char snp ) { int variantDepth = 0; int variantsWithRefSecondBase = 0; @@ -69,11 +76,6 @@ public class SecondBaseSkew implements VariantAnnotation { } } - if ( variantDepth > 0 ) { - double biasedProportion = ( 1.0 + variantsWithRefSecondBase )/(1.0 + variantDepth ); - return new Pair(variantDepth+1, biasedProportion); - } else { - return null; - } + return new Pair(variantDepth, variantsWithRefSecondBase); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index 0fb09a693..0ed37421a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -1,20 +1,27 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.Variation; +import java.util.Map; + public class SpanningDeletions extends StandardVariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { - int deletions = pileup.getNumberOfDeletions(); - return String.format("%.2f", (double)deletions/(double)pileup.size()); + public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation) { + int deletions = 0; + int depth = 0; + for ( String sample : stratifiedContexts.keySet() ) { + ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(); + deletions += pileup.getNumberOfDeletions(); + depth += pileup.size(); + } + return String.format("%.2f", (double)deletions/(double)depth); } public String getKeyName() { return "Dels"; } public String getDescription() { return "Dels,1,Float,\"Fraction of Reads Containing Spanning Deletions\""; } - - public boolean useZeroQualityReads() { return false; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java index 4ba6c6ba8..86c2c9702 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java @@ -1,17 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.utils.genotype.Variation; +import java.util.Map; + public interface VariantAnnotation { - // return the annotation for the given locus data (return null for no annotation) - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation); - - // return true if you want to use a context with mapping quality zero reads, false otherwise - public boolean useZeroQualityReads(); + // return the annotation for the given variation and context split by sample (return null for no annotation) + public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation); // return the INFO key public String getKeyName(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 204acc37a..31d3d7c12 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -4,7 +4,6 @@ import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.Genotype; @@ -165,9 +164,11 @@ public class VariantAnnotator extends RodWalker { // if the reference base is not ambiguous, the variant is a SNP, and it's the appropriate type, we can annotate if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 && variant.isBiallelic() && - variant.isSNP() ) - annotations = getAnnotations(ref, context, variant, requestedAnnotations); - + variant.isSNP() ) { + Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context, null, null); + if ( stratifiedContexts != null ) + annotations = getAnnotations(ref, stratifiedContexts, variant, requestedAnnotations); + } writeVCF(tracker, ref, context, variant, annotations); return 1; @@ -208,30 +209,26 @@ public class VariantAnnotator extends RodWalker { } // option #1: don't specify annotations to be used: standard annotations are used by default - public static Map getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation) { + public static Map getAnnotations(ReferenceContext ref, Map stratifiedContexts, Variation variation) { if ( standardAnnotations == null ) determineAllAnnotations(); - return getAnnotations(ref, context, variation, standardAnnotations.values()); + return getAnnotations(ref, stratifiedContexts, variation, standardAnnotations.values()); } // option #2: specify that all possible annotations be used - public static Map getAllAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation) { + public static Map getAllAnnotations(ReferenceContext ref, Map stratifiedContexts, Variation variation) { if ( allAnnotations == null ) determineAllAnnotations(); - return getAnnotations(ref, context, variation, allAnnotations.values()); + return getAnnotations(ref, stratifiedContexts, variation, allAnnotations.values()); } // option #3: specify the exact annotations to be used - public static Map getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, Collection annotations) { + public static Map getAnnotations(ReferenceContext ref, Map stratifiedContexts, Variation variation, Collection annotations) { HashMap results = new HashMap(); - // set up the pileup for the full collection of reads at this position - ReadBackedPileup fullPileup = context.getPileup(); - ReadBackedPileup MQ0freePileup = fullPileup.getPileupWithoutMappingQualityZeroReads(); - for ( VariantAnnotation annotator : annotations) { - String annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), variation); + String annot = annotator.annotate(ref, stratifiedContexts, variation); if ( annot != null ) { results.put(annotator.getKeyName(), annot); } @@ -295,12 +292,4 @@ public class VariantAnnotator extends RodWalker { } return null; } - - public static Genotype getFirstHetVariant(List genotypes) { - for ( Genotype g : genotypes ) { - if ( g.isHet() ) - return g; - } - return null; - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 2bf421644..16237feeb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -221,9 +221,9 @@ public class UnifiedGenotyper extends LocusWalker annotations; if ( UAC.ALL_ANNOTATIONS ) - annotations = VariantAnnotator.getAllAnnotations(refContext, context, call.first); + annotations = VariantAnnotator.getAllAnnotations(refContext, stratifiedContexts, call.first); else - annotations = VariantAnnotator.getAnnotations(refContext, context, call.first); + annotations = VariantAnnotator.getAnnotations(refContext, stratifiedContexts, call.first); ((ArbitraryFieldsBacked)call.first).setFields(annotations); } @@ -234,12 +234,6 @@ public class UnifiedGenotyper extends LocusWalker= 0.0 && d <= 1.0 ); } - private static AlignmentContext filterAlignmentContext(AlignmentContext context) { - return new AlignmentContext(context.getLocation(), - context.getPileup().getPileupWithoutMappingQualityZeroReads(), - 0); - } - public Integer reduceInit() { return 0; } public Integer reduce(Pair> value, Integer sum) { diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 247c242cf..90284a1c9 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("6c8bdfc9af27c35a88452e23976a891f")); + Arrays.asList("1225d977f3e83d362141c1bdb4730016")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("87697664fa1990af331581a5d3491b2c")); + Arrays.asList("7234adba82893ebf63338ab20ecc610d")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("e9400ca07cdca0254346fc15d53606d1")); + Arrays.asList("05d7f1c30130ce13d355200be8c2b31d")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("3380a891eb06e7425d48dc2f2c38ac3b")); + Arrays.asList("643fa948531ebbfa013e9eeced76430b")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 3f137fdf1..5dd7dbc4b 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("12d07018d9604b86b854e0c9132b0608")); + Arrays.asList("8f9ee611603dc75c7ca8b15d81000a82")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("65a808f077447f89e001e1cce8433acc")); + Arrays.asList("a8979577debf9c8d7ea4a40ee1a0f1ef")); executeTest("testMultiSamplePilot2 - Point Estimate EM", spec); } @@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testPooled1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, - Arrays.asList("4890ce37667379a0b60892cb0b9d8e09")); + Arrays.asList("253aba7ce0af7be4104dd275873a541d")); executeTest("testPooled1", spec); } @@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("fd5291c7115a558a7a6309ab417d2d57")); + Arrays.asList("1be2ce6ac40bdaf3d8f264f6d73c84ae")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("58ddc9426a25d0c3604af8da4eaa8294")); + Arrays.asList("8f92a56da4bd56b8755e9db89c31bfff")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("52e7030fe0968cd1a9b95154d8056b3f")); + Arrays.asList("67913655a8063f4deb9a6138f0844ddd")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } @@ -105,7 +105,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testGenotypeModeJoint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -genotype -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1, - Arrays.asList("1ac9c9c8be0eb6adee7c67fac7c19c0d")); + Arrays.asList("d51760ad01195ab299925595c6f62bb4")); executeTest("testGenotypeMode - Joint Estimate", spec); } @@ -113,7 +113,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testAllBasesModeJoint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -all_bases -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1, - Arrays.asList("37177e194a602749d31e5ecbf80ad309")); + Arrays.asList("2322d3ebf09ebf901fbcb653119d0b47")); executeTest("testAllBasesMode - Joint Estimate", spec); }