From 215e908a112fcba5ebb11a29741e361efdda6ab7 Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 31 Aug 2009 02:16:39 +0000 Subject: [PATCH] Reworking of the VariantFiltration system to allow for a windowed view of variants and inclusion of more data to the various filters. This now allows us to incorporate both the clustered SNP filter and a SNP-near-indels filter, which otherwise wasn't possible. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1484 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/contexts/VariantContext.java | 83 ++++++++ .../walkers/variants/IVFBinomialStrand.java | 9 +- .../gatk/walkers/variants/IVFNull.java | 4 +- .../walkers/variants/IVFPrimaryBases.java | 10 +- .../walkers/variants/IVFSecondaryBases.java | 10 +- .../variants/IndependentVariantFeature.java | 7 +- .../gatk/walkers/variants/RatioFilter.java | 9 +- .../walkers/variants/VECAlleleBalance.java | 10 +- .../walkers/variants/VECDepthOfCoverage.java | 11 +- .../walkers/variants/VECFisherStrand.java | 7 +- .../gatk/walkers/variants/VECHomopolymer.java | 18 +- .../walkers/variants/VECLodThreshold.java | 8 +- .../walkers/variants/VECMappingQuality.java | 8 +- .../variants/VECMappingQualityZero.java | 9 +- .../gatk/walkers/variants/VECNull.java | 5 +- .../variants/VECOnOffGenotypeRatio.java | 11 +- .../variants/VariantContextWindow.java | 100 +++++++++ .../variants/VariantExclusionCriterion.java | 5 +- .../variants/VariantFiltrationWalker.java | 192 +++++++++--------- 19 files changed, 354 insertions(+), 162 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/contexts/VariantContext.java create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantContextWindow.java diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/VariantContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/VariantContext.java new file mode 100755 index 000000000..c2ac37d00 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/contexts/VariantContext.java @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.contexts; + +import org.broadinstitute.sting.gatk.refdata.rodVariants; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; + +import net.sf.samtools.SAMRecord; +import java.util.*; + + +public class VariantContext { + private RefMetaDataTracker tracker; + private ReferenceContext ref; + private AlignmentContext context; + private rodVariants variant; + private AlignmentContext Q0freeContext = null; + + public VariantContext(RefMetaDataTracker tracker, ReferenceContext ref, + AlignmentContext context, rodVariants variant) { + this.tracker = tracker; + this.ref = ref; + this.context = context; + this.variant = variant; + } + + public RefMetaDataTracker getTracker() { return tracker; } + public ReferenceContext getReferenceContext() { return ref; } + public rodVariants getVariant() { return variant; } + public AlignmentContext getAlignmentContext() { return getAlignmentContext(false); } + public AlignmentContext getAlignmentContext(boolean useMQ0Reads) { + return (useMQ0Reads ? context : getQ0freeContext()); + } + + private AlignmentContext getQ0freeContext() { + if ( Q0freeContext == null ) { + // set up the variables + List reads = context.getReads(); + List offsets = context.getOffsets(); + Iterator readIter = reads.iterator(); + Iterator offsetIter = offsets.iterator(); + ArrayList Q0freeReads = new ArrayList(); + ArrayList Q0freeOffsets = new ArrayList(); + + // copy over good reads/offsets + while ( readIter.hasNext() ) { + SAMRecord read = readIter.next(); + Integer offset = offsetIter.next(); + if ( read.getMappingQuality() > 0 ) { + Q0freeReads.add(read); + Q0freeOffsets.add(offset); + } + } + + Q0freeContext = new AlignmentContext(context.getLocation(), Q0freeReads, Q0freeOffsets); + } + + return Q0freeContext; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java index 8e1a9f7e8..edaf8b250 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFBinomialStrand.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.ReadBackedPileup; @@ -18,11 +18,12 @@ public class IVFBinomialStrand implements IndependentVariantFeature { } } - public void compute(char ref, AlignmentContext context) { + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); likelihoods = new double[10]; - ReadBackedPileup pileup = new ReadBackedPileup(ref, context); - List reads = context.getReads(); + ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext()); + List reads = context.getAlignmentContext().getReads(); String bases = pileup.getBases(); for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java index 3986ae35c..586ec6258 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFNull.java @@ -1,11 +1,9 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; - public class IVFNull implements IndependentVariantFeature { public void initialize(String arguments) {} - public void compute(char ref, AlignmentContext context) { + public void compute(VariantContextWindow contextWindow) { } public double[] getLikelihoods() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFPrimaryBases.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFPrimaryBases.java index 1f7d07826..68a01310e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFPrimaryBases.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFPrimaryBases.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.ReadBackedPileup; @@ -29,14 +29,14 @@ public class IVFPrimaryBases implements IndependentVariantFeature { * Method to compute the result of this feature for each of the ten genotypes. The return value must be a double array * of length 10 (one for each genotype) and the value must be in log10-space. * - * @param ref the reference base - * @param context the context for the given locus + * @param contextWindow the context for the given locus * @return a ten-element array of log-likelihood result of the feature applied to each genotype */ - public void compute(char ref, AlignmentContext context) { + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); likelihoods = new double[10]; - ReadBackedPileup pileup = new ReadBackedPileup(ref, context); + ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext()); String primaryBases = pileup.getBases(); for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFSecondaryBases.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFSecondaryBases.java index f39fe1a74..3c1602025 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFSecondaryBases.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IVFSecondaryBases.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.ReadBackedPileup; @@ -56,14 +56,14 @@ public class IVFSecondaryBases implements IndependentVariantFeature { * Method to compute the result of this feature for each of the ten genotypes. The return value must be a double array * of length 10 (one for each genotype) and the value must be in log10-space. * - * @param ref the reference base - * @param context the context for the given locus + * @param contextWindow the context for the given locus * @return a ten-element array of log-likelihood result of the feature applied to each genotype */ - public void compute(char ref, AlignmentContext context) { + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); likelihoods = new double[10]; - ReadBackedPileup pileup = new ReadBackedPileup(ref, context); + ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext()); String primaryBases = pileup.getBases(); String secondaryBases = pileup.getSecondaryBasePileup(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java index 01cd630c5..75e5b7249 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/IndependentVariantFeature.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; - /** * Interface for conditionally independent variant features. */ @@ -23,10 +21,9 @@ public interface IndependentVariantFeature { * Method to compute the result of this feature for each of the ten genotypes. The return value must * be a double array of length 10 (one for each genotype) and the value must be in log10-space. * - * @param ref the reference base - * @param context the context for the given locus + * @param contextWindow the context for the given locus */ - public void compute(char ref, AlignmentContext context); + public void compute(VariantContextWindow contextWindow); public double[] getLikelihoods(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java index bd8089ce3..3ffb30773 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/RatioFilter.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.gatk.refdata.rodVariants; import org.broadinstitute.sting.gatk.refdata.TabularROD; import org.broadinstitute.sting.utils.*; @@ -255,10 +255,13 @@ public abstract class RatioFilter implements VariantExclusionCriterion { public boolean useZeroQualityReads() { return false; } - public void compute(char ref, AlignmentContext context, rodVariants variant) { + public void compute(VariantContextWindow contextWindow) { boolean exclude = false; + VariantContext context = contextWindow.getContext(); + rodVariants variant = context.getVariant(); + char ref = context.getReferenceContext().getBase(); - ReadBackedPileup pileup = new ReadBackedPileup(ref, context); + ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext()); if ( applyToVariant(variant) ) { Pair counts = scoreVariant(ref, pileup, variant); GenotypeFeatureData gfd = dataTable.get(orderedBases(variant.getBestGenotype())); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java index b00c75f45..72d4030d4 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; import org.broadinstitute.sting.gatk.refdata.rodVariants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Pair; @@ -52,8 +52,12 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends return new Pair(refCount, altCount); } - public void compute(char ref, AlignmentContext context, rodVariants variant) { - ReadBackedPileup pileup = new ReadBackedPileup(ref, context); + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); + char ref = context.getReferenceContext().getBase(); + rodVariants variant = context.getVariant(); + + ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext()); Pair counts = scoreVariant(ref, pileup, variant); int n = counts.first + counts.second; ratio = counts.first.doubleValue() / (double)n; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java index f44aac5d8..6f5e598f4 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECDepthOfCoverage.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.rodVariants; +import org.broadinstitute.sting.gatk.contexts.VariantContext; + /** * Created by IntelliJ IDEA. @@ -22,9 +22,10 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion { } } - public void compute(char ref, AlignmentContext context, rodVariants variant) { - exclude = context.getReads().size() > maximum; - depth = context.getReads().size(); + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); + depth = context.getAlignmentContext().getReads().size(); + exclude = depth > maximum; } public double inclusionProbability() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java index 2d80343d6..587d6cc98 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECFisherStrand.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.gatk.refdata.rodVariants; import org.broadinstitute.sting.utils.BaseUtils; import net.sf.samtools.SAMRecord; @@ -22,12 +23,14 @@ public class VECFisherStrand implements VariantExclusionCriterion { factorials.add(0.0); // add fact(0) } - public void compute(char ref, AlignmentContext context, rodVariants variant) { + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); + rodVariants variant = context.getVariant(); int allele1 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(0)); int allele2 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(1)); if (allele1 != allele2) { - exclude = strandTest(ref, context, allele1, allele2, pvalueLimit, null); + exclude = strandTest(context.getReferenceContext().getBase(), context.getAlignmentContext(), allele1, allele2, pvalueLimit, null); } else { exclude = false; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECHomopolymer.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECHomopolymer.java index 72f363149..bf4afee99 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECHomopolymer.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECHomopolymer.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.rodVariants; +import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; @@ -54,22 +54,23 @@ public class VECHomopolymer implements VariantExclusionCriterion { return ""; } - public void compute(char ref, AlignmentContext context, rodVariants variant) { - - char[] geno = variant.getBestGenotype().toCharArray(); + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); + AlignmentContext alignmentContext = context.getAlignmentContext(); + char[] geno = context.getVariant().getBestGenotype().toCharArray(); int[] genotype = {-1,-1}; for (int h = 0; h < 2; h ++) { genotype[h] = BaseUtils.simpleBaseToBaseIndex(geno[h]); } - if (contig == null || !context.getContig().equals(contig)) { - contig = context.getContig(); + if (contig == null || !alignmentContext.getContig().equals(contig)) { + contig = alignmentContext.getContig(); contigSeq = refSeq.getSequence(contig); } String bases = new String(contigSeq.getBases()); - String prevbases = bases.substring((int) (context.getPosition() - extent - 1), (int) (context.getPosition() - 1)); - String nextbases = bases.substring((int) (context.getPosition() + 1), (int) (context.getPosition() + extent + 1)); + String prevbases = bases.substring((int) (alignmentContext.getPosition() - extent - 1), (int) (alignmentContext.getPosition() - 1)); + String nextbases = bases.substring((int) (alignmentContext.getPosition() + 1), (int) (alignmentContext.getPosition() + extent + 1)); int[] prevCounts = {0,0,0,0}; int[] nextCounts = {0,0,0,0}; @@ -103,6 +104,7 @@ public class VECHomopolymer implements VariantExclusionCriterion { if ((float)(prevMax)/(float)(extent) > frac) {homopolymer[0] = prevMaxBase;} if ((float)(nextMax)/(float)(extent) > frac) {homopolymer[1] = nextMaxBase;} + char ref = context.getReferenceContext().getBase(); boolean checkOne = homopolymer[0] == genotype[0] && homopolymer[0] != BaseUtils.simpleBaseToBaseIndex(ref); boolean checkTwo = homopolymer[0] == genotype[1] && homopolymer[0] != BaseUtils.simpleBaseToBaseIndex(ref); boolean checkThree = homopolymer[1] == genotype[0] && homopolymer[1] != BaseUtils.simpleBaseToBaseIndex(ref); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java index e2dd6928b..9010eb494 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECLodThreshold.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.refdata.rodVariants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.VariantContext; public class VECLodThreshold implements VariantExclusionCriterion { private double lodThreshold = 5.0; @@ -14,8 +13,9 @@ public class VECLodThreshold implements VariantExclusionCriterion { } } - public void compute(char ref, AlignmentContext context, rodVariants variant) { - lod = variant.getLodBtr(); + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); + lod = context.getVariant().getLodBtr(); exclude = lod < lodThreshold; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java index f3cecb87d..19d3c6a84 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQuality.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.rodVariants; +import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.utils.MathUtils; import net.sf.samtools.SAMRecord; @@ -19,8 +18,9 @@ public class VECMappingQuality implements VariantExclusionCriterion { } } - public void compute(char ref, AlignmentContext context, rodVariants variant) { - List reads = context.getReads(); + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); + List reads = context.getAlignmentContext().getReads(); int[] qualities = new int[reads.size()]; for (int i=0; i < reads.size(); i++) qualities[i] = reads.get(i).getMappingQuality(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQualityZero.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQualityZero.java index 991b456d1..d8fc22ce5 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQualityZero.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECMappingQualityZero.java @@ -1,8 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.rodVariants; -import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.gatk.contexts.VariantContext; import net.sf.samtools.SAMRecord; import java.util.List; @@ -19,8 +17,9 @@ public class VECMappingQualityZero implements VariantExclusionCriterion { } } - public void compute(char ref, AlignmentContext context, rodVariants variant) { - List reads = context.getReads(); + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); + List reads = context.getAlignmentContext(useZeroQualityReads()).getReads(); mq0Count = 0; for (int i=0; i < reads.size(); i++) { if ( reads.get(i).getMappingQuality() == 0 ) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECNull.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECNull.java index 90bab3e83..3889bdcc0 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECNull.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECNull.java @@ -1,13 +1,10 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.rodVariants; - public class VECNull implements VariantExclusionCriterion { public void initialize(String arguments) { } - public void compute(char ref, AlignmentContext context, rodVariants variant) { + public void compute(VariantContextWindow contextWindow) { } public double inclusionProbability() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java index aaf03f613..6d447ca11 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; import org.broadinstitute.sting.gatk.refdata.rodVariants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.VariantContext; import org.broadinstitute.sting.utils.*; public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // extends RatioFilter { @@ -46,9 +46,12 @@ public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // ext return new Pair(on, off); } - public void compute(char ref, AlignmentContext context, rodVariants variant) { - ReadBackedPileup pileup = new ReadBackedPileup(ref, context); - Pair counts = scoreVariant(ref, pileup, variant); + public void compute(VariantContextWindow contextWindow) { + VariantContext context = contextWindow.getContext(); + char ref = context.getReferenceContext().getBase(); + + ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext()); + Pair counts = scoreVariant(ref, pileup, context.getVariant()); int n = counts.first + counts.second; ratio = counts.first.doubleValue() / (double)n; exclude = ratio < threshold; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantContextWindow.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantContextWindow.java new file mode 100755 index 000000000..c72d899b2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantContextWindow.java @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.playground.gatk.walkers.variants; + +import org.broadinstitute.sting.gatk.contexts.VariantContext; +import org.broadinstitute.sting.utils.StingException; + +import java.util.*; + +/** + * A window of variants surrounding the current variant being investigated + * + * @author ebanks + * @version 0.1 + */ + +public class VariantContextWindow { + /** + * The variants. + */ + private LinkedList window = new LinkedList(); + private int currentContext; + + /** + * Contructor for a variant context. + * @param firstVariants the first set of variants, comprising the right half of the window + */ + public VariantContextWindow(List firstVariants) { + int windowSize = (firstVariants == null ? 1 : 2 * firstVariants.size() + 1); + currentContext = (firstVariants == null ? 0 : firstVariants.size()); + window.addAll(firstVariants); + while ( window.size() < windowSize ) + window.addFirst(null); + } + + /** + * The context currently being examined. + * @return The current context. + */ + public VariantContext getContext() { + return window.get(currentContext); + } + + /** + * The maximum number of elements that can be requested on either end of the current context. + * @return max. + */ + public int maxWindowElements() { + return currentContext; + } + + /** + * The window around the context currently being examined. + * @param elementsToLeft number of earlier contexts to return () + * @param elementsToLeft number of later contexts to return () + * @return The current context window. + */ + public VariantContext[] getWindow(int elementsToLeft, int elementsToRight) { + if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() ) + throw new StingException("Too large a window requested"); + + VariantContext[] array = new VariantContext[elementsToLeft + elementsToRight + 1]; + ListIterator iter = window.listIterator(currentContext - elementsToLeft); + for (int i = 0; i < elementsToLeft + elementsToRight + 1; i++) + array[i] = iter.next(); + return array; + } + + /** + * Move the window along to the next context + * @param context The new rightmost context + */ + public void moveWindow(VariantContext context) { + window.removeFirst(); + window.addLast(context); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java index 63a5be1f7..959a27395 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantExclusionCriterion.java @@ -1,12 +1,9 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.rodVariants; - public interface VariantExclusionCriterion { public void initialize(String arguments); - public void compute(char ref, AlignmentContext context, rodVariants variant); + public void compute(VariantContextWindow contextWindow); //public boolean isExcludable(); public double inclusionProbability(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java index 2548be67f..31e19f272 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; @@ -12,7 +11,6 @@ import java.io.FileNotFoundException; import java.io.PrintWriter; import java.util.*; -import net.sf.samtools.SAMRecord; /** * VariantFiltrationWalker applies specified conditionally independent features and filters to pre-called variants. @@ -38,6 +36,11 @@ public class VariantFiltrationWalker extends LocusWalker { private ArrayList requestedFeatures; private ArrayList requestedExclusions; + // the structures necessary to initialize and maintain a windowed context + private VariantContextWindow variantContextWindow; + private static final int windowSize = 10; // 10 variants on either end of the current one + private ArrayList windowInitializer = new ArrayList(); + /** * Prepare the output file and the list of available features. */ @@ -174,107 +177,103 @@ public class VariantFiltrationWalker extends LocusWalker { */ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { rodVariants variant = (rodVariants) tracker.lookup("variant", null); - + // Ignore places where we don't have a variant or where the reference base is ambiguous. - if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) { - HashMap exclusionResults = new HashMap(); + if ( variant == null || BaseUtils.simpleBaseToBaseIndex(ref.getBase()) == -1 ) + return 0; - if (VERBOSE) { out.println("Original:\n" + variant); } + VariantContext varContext = new VariantContext(tracker, ref, context, variant); - paramsWriter.print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t"); - - // Apply features that modify the likelihoods and LOD scores - for ( IndependentVariantFeature ivf : requestedFeatures ) { - ivf.compute(ref.getBase(), context); - - double[] weights = ivf.getLikelihoods(); - - variant.adjustLikelihoods(weights); - - if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); } - - paramsWriter.print(ivf.getStudyInfo() + "\t"); + // if we're still initializing the context, do so + if ( windowInitializer != null ) { + windowInitializer.add(varContext); + if ( windowInitializer.size() == windowSize ) { + variantContextWindow = new VariantContextWindow(windowInitializer); + windowInitializer = null; } - - // We need to provide an alternative context without mapping quality 0 reads - // for those exclusion criterion that don't want them - AlignmentContext Q0freeContext = removeQ0reads(context); - - // Apply exclusion tests that score the variant call - if (VERBOSE) { - out.print("InclusionProbabilities:["); - } - - // Use the filters to score the variant - double jointInclusionProbability = 1.0; - for ( VariantExclusionCriterion vec : requestedExclusions ) { - vec.compute(ref.getBase(), (vec.useZeroQualityReads() ? context : Q0freeContext), variant); - - String exclusionClassName = rationalizeClassName(vec.getClass()); - - Double inclusionProbability = vec.inclusionProbability(); - jointInclusionProbability *= inclusionProbability; - exclusionResults.put(exclusionClassName, inclusionProbability); - - if (inclusionProbability < INCLUSION_THRESHOLD) { - PrintWriter ewriter = exclusionWriters.get(exclusionClassName); - if (ewriter != null) { - ewriter.println(variant); - ewriter.flush(); - } - } - - if (VERBOSE) { - out.print(exclusionClassName + "=" + inclusionProbability + ";"); - } - - paramsWriter.print(vec.getStudyInfo() + "\t"); - } - - // Decide whether we should keep the call or not - if (jointInclusionProbability >= INCLUSION_THRESHOLD) { - variantsWriter.println(variant); - - if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); } - } else { - if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:excluded\n"); } - } - - rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbSNP", null); - if ( dbsnp == null ) { - paramsWriter.print("false\tfalse\t"); - } else { - paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t"); - } - - paramsWriter.println(GenotypeUtils.isHet(variant)); - - return 1; + } else { + variantContextWindow.moveWindow(varContext); + compute(); } - return 0; + return 1; } - private AlignmentContext removeQ0reads(AlignmentContext context) { - // set up the variables - List reads = context.getReads(); - List offsets = context.getOffsets(); - Iterator readIter = reads.iterator(); - Iterator offsetIter = offsets.iterator(); - ArrayList Q0freeReads = new ArrayList(); - ArrayList Q0freeOffsets = new ArrayList(); + private void compute() { + // get the current context + VariantContext context = variantContextWindow.getContext(); + if ( context == null ) + return; + rodVariants variant = context.getVariant(); - // copy over good reads/offsets - while ( readIter.hasNext() ) { - SAMRecord read = readIter.next(); - Integer offset = offsetIter.next(); - if ( read.getMappingQuality() > 0 ) { - Q0freeReads.add(read); - Q0freeOffsets.add(offset); - } + HashMap exclusionResults = new HashMap(); + + if (VERBOSE) { out.println("Original:\n" + variant); } + + GenomeLoc loc = context.getAlignmentContext().getLocation(); + paramsWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t"); + + // Apply features that modify the likelihoods and LOD scores + for ( IndependentVariantFeature ivf : requestedFeatures ) { + ivf.compute(variantContextWindow); + + double[] weights = ivf.getLikelihoods(); + + variant.adjustLikelihoods(weights); + + if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); } + + paramsWriter.print(ivf.getStudyInfo() + "\t"); } - - return new AlignmentContext(context.getLocation(), Q0freeReads, Q0freeOffsets); + + // Apply exclusion tests that score the variant call + if (VERBOSE) { + out.print("InclusionProbabilities:["); + } + + // Use the filters to score the variant + double jointInclusionProbability = 1.0; + for ( VariantExclusionCriterion vec : requestedExclusions ) { + vec.compute(variantContextWindow); + + String exclusionClassName = rationalizeClassName(vec.getClass()); + + Double inclusionProbability = vec.inclusionProbability(); + jointInclusionProbability *= inclusionProbability; + exclusionResults.put(exclusionClassName, inclusionProbability); + + if (inclusionProbability < INCLUSION_THRESHOLD) { + PrintWriter ewriter = exclusionWriters.get(exclusionClassName); + if (ewriter != null) { + ewriter.println(variant); + ewriter.flush(); + } + } + + if (VERBOSE) { + out.print(exclusionClassName + "=" + inclusionProbability + ";"); + } + + paramsWriter.print(vec.getStudyInfo() + "\t"); + } + + // Decide whether we should keep the call or not + if (jointInclusionProbability >= INCLUSION_THRESHOLD) { + variantsWriter.println(variant); + + if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); } + } else { + if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:excluded\n"); } + } + + rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null); + if ( dbsnp == null ) { + paramsWriter.print("false\tfalse\t"); + } else { + paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t"); + } + + paramsWriter.println(GenotypeUtils.isHet(variant)); } /** @@ -294,6 +293,11 @@ public class VariantFiltrationWalker extends LocusWalker { * @param result the number of loci seen. */ public void onTraversalDone(Integer result) { + for (int i=0; i < windowSize; i++) { + variantContextWindow.moveWindow(null); + compute(); + } + out.printf("Processed %d loci.\n", result); variantsWriter.close();