Reworking of the VariantFiltration system to allow for a windowed view of variants and inclusion of more data to the various filters.
This now allows us to incorporate both the clustered SNP filter and a SNP-near-indels filter, which otherwise wasn't possible. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1484 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2402dcd4c9
commit
215e908a11
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.contexts;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class VariantContext {
|
||||
private RefMetaDataTracker tracker;
|
||||
private ReferenceContext ref;
|
||||
private AlignmentContext context;
|
||||
private rodVariants variant;
|
||||
private AlignmentContext Q0freeContext = null;
|
||||
|
||||
public VariantContext(RefMetaDataTracker tracker, ReferenceContext ref,
|
||||
AlignmentContext context, rodVariants variant) {
|
||||
this.tracker = tracker;
|
||||
this.ref = ref;
|
||||
this.context = context;
|
||||
this.variant = variant;
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getTracker() { return tracker; }
|
||||
public ReferenceContext getReferenceContext() { return ref; }
|
||||
public rodVariants getVariant() { return variant; }
|
||||
public AlignmentContext getAlignmentContext() { return getAlignmentContext(false); }
|
||||
public AlignmentContext getAlignmentContext(boolean useMQ0Reads) {
|
||||
return (useMQ0Reads ? context : getQ0freeContext());
|
||||
}
|
||||
|
||||
private AlignmentContext getQ0freeContext() {
|
||||
if ( Q0freeContext == null ) {
|
||||
// set up the variables
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
Iterator<SAMRecord> readIter = reads.iterator();
|
||||
Iterator<Integer> offsetIter = offsets.iterator();
|
||||
ArrayList<SAMRecord> Q0freeReads = new ArrayList<SAMRecord>();
|
||||
ArrayList<Integer> Q0freeOffsets = new ArrayList<Integer>();
|
||||
|
||||
// copy over good reads/offsets
|
||||
while ( readIter.hasNext() ) {
|
||||
SAMRecord read = readIter.next();
|
||||
Integer offset = offsetIter.next();
|
||||
if ( read.getMappingQuality() > 0 ) {
|
||||
Q0freeReads.add(read);
|
||||
Q0freeOffsets.add(offset);
|
||||
}
|
||||
}
|
||||
|
||||
Q0freeContext = new AlignmentContext(context.getLocation(), Q0freeReads, Q0freeOffsets);
|
||||
}
|
||||
|
||||
return Q0freeContext;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
|
||||
|
|
@ -18,11 +18,12 @@ public class IVFBinomialStrand implements IndependentVariantFeature {
|
|||
}
|
||||
}
|
||||
|
||||
public void compute(char ref, AlignmentContext context) {
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
VariantContext context = contextWindow.getContext();
|
||||
likelihoods = new double[10];
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext());
|
||||
List<SAMRecord> reads = context.getAlignmentContext().getReads();
|
||||
String bases = pileup.getBases();
|
||||
|
||||
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
|
||||
|
|
|
|||
|
|
@ -1,11 +1,9 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
||||
public class IVFNull implements IndependentVariantFeature {
|
||||
public void initialize(String arguments) {}
|
||||
|
||||
public void compute(char ref, AlignmentContext context) {
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
}
|
||||
|
||||
public double[] getLikelihoods() {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
|
||||
|
|
@ -29,14 +29,14 @@ public class IVFPrimaryBases implements IndependentVariantFeature {
|
|||
* Method to compute the result of this feature for each of the ten genotypes. The return value must be a double array
|
||||
* of length 10 (one for each genotype) and the value must be in log10-space.
|
||||
*
|
||||
* @param ref the reference base
|
||||
* @param context the context for the given locus
|
||||
* @param contextWindow the context for the given locus
|
||||
* @return a ten-element array of log-likelihood result of the feature applied to each genotype
|
||||
*/
|
||||
public void compute(char ref, AlignmentContext context) {
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
VariantContext context = contextWindow.getContext();
|
||||
likelihoods = new double[10];
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext());
|
||||
String primaryBases = pileup.getBases();
|
||||
|
||||
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
|
||||
|
|
@ -56,14 +56,14 @@ public class IVFSecondaryBases implements IndependentVariantFeature {
|
|||
* Method to compute the result of this feature for each of the ten genotypes. The return value must be a double array
|
||||
* of length 10 (one for each genotype) and the value must be in log10-space.
|
||||
*
|
||||
* @param ref the reference base
|
||||
* @param context the context for the given locus
|
||||
* @param contextWindow the context for the given locus
|
||||
* @return a ten-element array of log-likelihood result of the feature applied to each genotype
|
||||
*/
|
||||
public void compute(char ref, AlignmentContext context) {
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
VariantContext context = contextWindow.getContext();
|
||||
likelihoods = new double[10];
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext());
|
||||
String primaryBases = pileup.getBases();
|
||||
String secondaryBases = pileup.getSecondaryBasePileup();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
||||
/**
|
||||
* Interface for conditionally independent variant features.
|
||||
*/
|
||||
|
|
@ -23,10 +21,9 @@ public interface IndependentVariantFeature {
|
|||
* Method to compute the result of this feature for each of the ten genotypes. The return value must
|
||||
* be a double array of length 10 (one for each genotype) and the value must be in log10-space.
|
||||
*
|
||||
* @param ref the reference base
|
||||
* @param context the context for the given locus
|
||||
* @param contextWindow the context for the given locus
|
||||
*/
|
||||
public void compute(char ref, AlignmentContext context);
|
||||
public void compute(VariantContextWindow contextWindow);
|
||||
|
||||
public double[] getLikelihoods();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.gatk.refdata.TabularROD;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
|
@ -255,10 +255,13 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
|
|||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
||||
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
boolean exclude = false;
|
||||
VariantContext context = contextWindow.getContext();
|
||||
rodVariants variant = context.getVariant();
|
||||
char ref = context.getReferenceContext().getBase();
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext());
|
||||
if ( applyToVariant(variant) ) {
|
||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||
GenotypeFeatureData gfd = dataTable.get(orderedBases(variant.getBestGenotype()));
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
|
@ -52,8 +52,12 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends
|
|||
return new Pair(refCount, altCount);
|
||||
}
|
||||
|
||||
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
VariantContext context = contextWindow.getContext();
|
||||
char ref = context.getReferenceContext().getBase();
|
||||
rodVariants variant = context.getVariant();
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext());
|
||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||
int n = counts.first + counts.second;
|
||||
ratio = counts.first.doubleValue() / (double)n;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -22,9 +22,10 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
|
|||
}
|
||||
}
|
||||
|
||||
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
||||
exclude = context.getReads().size() > maximum;
|
||||
depth = context.getReads().size();
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
VariantContext context = contextWindow.getContext();
|
||||
depth = context.getAlignmentContext().getReads().size();
|
||||
exclude = depth > maximum;
|
||||
}
|
||||
|
||||
public double inclusionProbability() {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
|
@ -22,12 +23,14 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
|||
factorials.add(0.0); // add fact(0)
|
||||
}
|
||||
|
||||
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
VariantContext context = contextWindow.getContext();
|
||||
rodVariants variant = context.getVariant();
|
||||
int allele1 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(0));
|
||||
int allele2 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(1));
|
||||
|
||||
if (allele1 != allele2) {
|
||||
exclude = strandTest(ref, context, allele1, allele2, pvalueLimit, null);
|
||||
exclude = strandTest(context.getReferenceContext().getBase(), context.getAlignmentContext(), allele1, allele2, pvalueLimit, null);
|
||||
} else {
|
||||
exclude = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
|
||||
|
|
@ -54,22 +54,23 @@ public class VECHomopolymer implements VariantExclusionCriterion {
|
|||
return "";
|
||||
}
|
||||
|
||||
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
||||
|
||||
char[] geno = variant.getBestGenotype().toCharArray();
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
VariantContext context = contextWindow.getContext();
|
||||
AlignmentContext alignmentContext = context.getAlignmentContext();
|
||||
char[] geno = context.getVariant().getBestGenotype().toCharArray();
|
||||
int[] genotype = {-1,-1};
|
||||
for (int h = 0; h < 2; h ++) {
|
||||
genotype[h] = BaseUtils.simpleBaseToBaseIndex(geno[h]);
|
||||
}
|
||||
|
||||
if (contig == null || !context.getContig().equals(contig)) {
|
||||
contig = context.getContig();
|
||||
if (contig == null || !alignmentContext.getContig().equals(contig)) {
|
||||
contig = alignmentContext.getContig();
|
||||
contigSeq = refSeq.getSequence(contig);
|
||||
}
|
||||
|
||||
String bases = new String(contigSeq.getBases());
|
||||
String prevbases = bases.substring((int) (context.getPosition() - extent - 1), (int) (context.getPosition() - 1));
|
||||
String nextbases = bases.substring((int) (context.getPosition() + 1), (int) (context.getPosition() + extent + 1));
|
||||
String prevbases = bases.substring((int) (alignmentContext.getPosition() - extent - 1), (int) (alignmentContext.getPosition() - 1));
|
||||
String nextbases = bases.substring((int) (alignmentContext.getPosition() + 1), (int) (alignmentContext.getPosition() + extent + 1));
|
||||
|
||||
int[] prevCounts = {0,0,0,0};
|
||||
int[] nextCounts = {0,0,0,0};
|
||||
|
|
@ -103,6 +104,7 @@ public class VECHomopolymer implements VariantExclusionCriterion {
|
|||
if ((float)(prevMax)/(float)(extent) > frac) {homopolymer[0] = prevMaxBase;}
|
||||
if ((float)(nextMax)/(float)(extent) > frac) {homopolymer[1] = nextMaxBase;}
|
||||
|
||||
char ref = context.getReferenceContext().getBase();
|
||||
boolean checkOne = homopolymer[0] == genotype[0] && homopolymer[0] != BaseUtils.simpleBaseToBaseIndex(ref);
|
||||
boolean checkTwo = homopolymer[0] == genotype[1] && homopolymer[0] != BaseUtils.simpleBaseToBaseIndex(ref);
|
||||
boolean checkThree = homopolymer[1] == genotype[0] && homopolymer[1] != BaseUtils.simpleBaseToBaseIndex(ref);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
|
||||
public class VECLodThreshold implements VariantExclusionCriterion {
|
||||
private double lodThreshold = 5.0;
|
||||
|
|
@ -14,8 +13,9 @@ public class VECLodThreshold implements VariantExclusionCriterion {
|
|||
}
|
||||
}
|
||||
|
||||
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
||||
lod = variant.getLodBtr();
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
VariantContext context = contextWindow.getContext();
|
||||
lod = context.getVariant().getLodBtr();
|
||||
exclude = lod < lodThreshold;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
|
|
@ -19,8 +18,9 @@ public class VECMappingQuality implements VariantExclusionCriterion {
|
|||
}
|
||||
}
|
||||
|
||||
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
VariantContext context = contextWindow.getContext();
|
||||
List<SAMRecord> reads = context.getAlignmentContext().getReads();
|
||||
int[] qualities = new int[reads.size()];
|
||||
for (int i=0; i < reads.size(); i++)
|
||||
qualities[i] = reads.get(i).getMappingQuality();
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -19,8 +17,9 @@ public class VECMappingQualityZero implements VariantExclusionCriterion {
|
|||
}
|
||||
}
|
||||
|
||||
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
VariantContext context = contextWindow.getContext();
|
||||
List<SAMRecord> reads = context.getAlignmentContext(useZeroQualityReads()).getReads();
|
||||
mq0Count = 0;
|
||||
for (int i=0; i < reads.size(); i++) {
|
||||
if ( reads.get(i).getMappingQuality() == 0 )
|
||||
|
|
|
|||
|
|
@ -1,13 +1,10 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
|
||||
public class VECNull implements VariantExclusionCriterion {
|
||||
public void initialize(String arguments) {
|
||||
}
|
||||
|
||||
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
}
|
||||
|
||||
public double inclusionProbability() {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // extends RatioFilter {
|
||||
|
|
@ -46,9 +46,12 @@ public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // ext
|
|||
return new Pair<Integer, Integer>(on, off);
|
||||
}
|
||||
|
||||
public void compute(char ref, AlignmentContext context, rodVariants variant) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||
public void compute(VariantContextWindow contextWindow) {
|
||||
VariantContext context = contextWindow.getContext();
|
||||
char ref = context.getReferenceContext().getBase();
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext());
|
||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, context.getVariant());
|
||||
int n = counts.first + counts.second;
|
||||
ratio = counts.first.doubleValue() / (double)n;
|
||||
exclude = ratio < threshold;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A window of variants surrounding the current variant being investigated
|
||||
*
|
||||
* @author ebanks
|
||||
* @version 0.1
|
||||
*/
|
||||
|
||||
public class VariantContextWindow {
|
||||
/**
|
||||
* The variants.
|
||||
*/
|
||||
private LinkedList<VariantContext> window = new LinkedList<VariantContext>();
|
||||
private int currentContext;
|
||||
|
||||
/**
|
||||
* Contructor for a variant context.
|
||||
* @param firstVariants the first set of variants, comprising the right half of the window
|
||||
*/
|
||||
public VariantContextWindow(List<VariantContext> firstVariants) {
|
||||
int windowSize = (firstVariants == null ? 1 : 2 * firstVariants.size() + 1);
|
||||
currentContext = (firstVariants == null ? 0 : firstVariants.size());
|
||||
window.addAll(firstVariants);
|
||||
while ( window.size() < windowSize )
|
||||
window.addFirst(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* The context currently being examined.
|
||||
* @return The current context.
|
||||
*/
|
||||
public VariantContext getContext() {
|
||||
return window.get(currentContext);
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum number of elements that can be requested on either end of the current context.
|
||||
* @return max.
|
||||
*/
|
||||
public int maxWindowElements() {
|
||||
return currentContext;
|
||||
}
|
||||
|
||||
/**
|
||||
* The window around the context currently being examined.
|
||||
* @param elementsToLeft number of earlier contexts to return ()
|
||||
* @param elementsToLeft number of later contexts to return ()
|
||||
* @return The current context window.
|
||||
*/
|
||||
public VariantContext[] getWindow(int elementsToLeft, int elementsToRight) {
|
||||
if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() )
|
||||
throw new StingException("Too large a window requested");
|
||||
|
||||
VariantContext[] array = new VariantContext[elementsToLeft + elementsToRight + 1];
|
||||
ListIterator<VariantContext> iter = window.listIterator(currentContext - elementsToLeft);
|
||||
for (int i = 0; i < elementsToLeft + elementsToRight + 1; i++)
|
||||
array[i] = iter.next();
|
||||
return array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move the window along to the next context
|
||||
* @param context The new rightmost context
|
||||
*/
|
||||
public void moveWindow(VariantContext context) {
|
||||
window.removeFirst();
|
||||
window.addLast(context);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,12 +1,9 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodVariants;
|
||||
|
||||
public interface VariantExclusionCriterion {
|
||||
public void initialize(String arguments);
|
||||
|
||||
public void compute(char ref, AlignmentContext context, rodVariants variant);
|
||||
public void compute(VariantContextWindow contextWindow);
|
||||
|
||||
//public boolean isExcludable();
|
||||
public double inclusionProbability();
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
|
@ -12,7 +11,6 @@ import java.io.FileNotFoundException;
|
|||
import java.io.PrintWriter;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* VariantFiltrationWalker applies specified conditionally independent features and filters to pre-called variants.
|
||||
|
|
@ -38,6 +36,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
private ArrayList<IndependentVariantFeature> requestedFeatures;
|
||||
private ArrayList<VariantExclusionCriterion> requestedExclusions;
|
||||
|
||||
// the structures necessary to initialize and maintain a windowed context
|
||||
private VariantContextWindow variantContextWindow;
|
||||
private static final int windowSize = 10; // 10 variants on either end of the current one
|
||||
private ArrayList<VariantContext> windowInitializer = new ArrayList<VariantContext>();
|
||||
|
||||
/**
|
||||
* Prepare the output file and the list of available features.
|
||||
*/
|
||||
|
|
@ -174,107 +177,103 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
*/
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
rodVariants variant = (rodVariants) tracker.lookup("variant", null);
|
||||
|
||||
|
||||
// Ignore places where we don't have a variant or where the reference base is ambiguous.
|
||||
if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) {
|
||||
HashMap<String, Double> exclusionResults = new HashMap<String, Double>();
|
||||
if ( variant == null || BaseUtils.simpleBaseToBaseIndex(ref.getBase()) == -1 )
|
||||
return 0;
|
||||
|
||||
if (VERBOSE) { out.println("Original:\n" + variant); }
|
||||
VariantContext varContext = new VariantContext(tracker, ref, context, variant);
|
||||
|
||||
paramsWriter.print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t");
|
||||
|
||||
// Apply features that modify the likelihoods and LOD scores
|
||||
for ( IndependentVariantFeature ivf : requestedFeatures ) {
|
||||
ivf.compute(ref.getBase(), context);
|
||||
|
||||
double[] weights = ivf.getLikelihoods();
|
||||
|
||||
variant.adjustLikelihoods(weights);
|
||||
|
||||
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
|
||||
|
||||
paramsWriter.print(ivf.getStudyInfo() + "\t");
|
||||
// if we're still initializing the context, do so
|
||||
if ( windowInitializer != null ) {
|
||||
windowInitializer.add(varContext);
|
||||
if ( windowInitializer.size() == windowSize ) {
|
||||
variantContextWindow = new VariantContextWindow(windowInitializer);
|
||||
windowInitializer = null;
|
||||
}
|
||||
|
||||
// We need to provide an alternative context without mapping quality 0 reads
|
||||
// for those exclusion criterion that don't want them
|
||||
AlignmentContext Q0freeContext = removeQ0reads(context);
|
||||
|
||||
// Apply exclusion tests that score the variant call
|
||||
if (VERBOSE) {
|
||||
out.print("InclusionProbabilities:[");
|
||||
}
|
||||
|
||||
// Use the filters to score the variant
|
||||
double jointInclusionProbability = 1.0;
|
||||
for ( VariantExclusionCriterion vec : requestedExclusions ) {
|
||||
vec.compute(ref.getBase(), (vec.useZeroQualityReads() ? context : Q0freeContext), variant);
|
||||
|
||||
String exclusionClassName = rationalizeClassName(vec.getClass());
|
||||
|
||||
Double inclusionProbability = vec.inclusionProbability();
|
||||
jointInclusionProbability *= inclusionProbability;
|
||||
exclusionResults.put(exclusionClassName, inclusionProbability);
|
||||
|
||||
if (inclusionProbability < INCLUSION_THRESHOLD) {
|
||||
PrintWriter ewriter = exclusionWriters.get(exclusionClassName);
|
||||
if (ewriter != null) {
|
||||
ewriter.println(variant);
|
||||
ewriter.flush();
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
out.print(exclusionClassName + "=" + inclusionProbability + ";");
|
||||
}
|
||||
|
||||
paramsWriter.print(vec.getStudyInfo() + "\t");
|
||||
}
|
||||
|
||||
// Decide whether we should keep the call or not
|
||||
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
|
||||
variantsWriter.println(variant);
|
||||
|
||||
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
|
||||
} else {
|
||||
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:excluded\n"); }
|
||||
}
|
||||
|
||||
rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbSNP", null);
|
||||
if ( dbsnp == null ) {
|
||||
paramsWriter.print("false\tfalse\t");
|
||||
} else {
|
||||
paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
|
||||
}
|
||||
|
||||
paramsWriter.println(GenotypeUtils.isHet(variant));
|
||||
|
||||
return 1;
|
||||
} else {
|
||||
variantContextWindow.moveWindow(varContext);
|
||||
compute();
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
private AlignmentContext removeQ0reads(AlignmentContext context) {
|
||||
// set up the variables
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
Iterator<SAMRecord> readIter = reads.iterator();
|
||||
Iterator<Integer> offsetIter = offsets.iterator();
|
||||
ArrayList<SAMRecord> Q0freeReads = new ArrayList<SAMRecord>();
|
||||
ArrayList<Integer> Q0freeOffsets = new ArrayList<Integer>();
|
||||
private void compute() {
|
||||
// get the current context
|
||||
VariantContext context = variantContextWindow.getContext();
|
||||
if ( context == null )
|
||||
return;
|
||||
rodVariants variant = context.getVariant();
|
||||
|
||||
// copy over good reads/offsets
|
||||
while ( readIter.hasNext() ) {
|
||||
SAMRecord read = readIter.next();
|
||||
Integer offset = offsetIter.next();
|
||||
if ( read.getMappingQuality() > 0 ) {
|
||||
Q0freeReads.add(read);
|
||||
Q0freeOffsets.add(offset);
|
||||
}
|
||||
HashMap<String, Double> exclusionResults = new HashMap<String, Double>();
|
||||
|
||||
if (VERBOSE) { out.println("Original:\n" + variant); }
|
||||
|
||||
GenomeLoc loc = context.getAlignmentContext().getLocation();
|
||||
paramsWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t");
|
||||
|
||||
// Apply features that modify the likelihoods and LOD scores
|
||||
for ( IndependentVariantFeature ivf : requestedFeatures ) {
|
||||
ivf.compute(variantContextWindow);
|
||||
|
||||
double[] weights = ivf.getLikelihoods();
|
||||
|
||||
variant.adjustLikelihoods(weights);
|
||||
|
||||
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
|
||||
|
||||
paramsWriter.print(ivf.getStudyInfo() + "\t");
|
||||
}
|
||||
|
||||
return new AlignmentContext(context.getLocation(), Q0freeReads, Q0freeOffsets);
|
||||
|
||||
// Apply exclusion tests that score the variant call
|
||||
if (VERBOSE) {
|
||||
out.print("InclusionProbabilities:[");
|
||||
}
|
||||
|
||||
// Use the filters to score the variant
|
||||
double jointInclusionProbability = 1.0;
|
||||
for ( VariantExclusionCriterion vec : requestedExclusions ) {
|
||||
vec.compute(variantContextWindow);
|
||||
|
||||
String exclusionClassName = rationalizeClassName(vec.getClass());
|
||||
|
||||
Double inclusionProbability = vec.inclusionProbability();
|
||||
jointInclusionProbability *= inclusionProbability;
|
||||
exclusionResults.put(exclusionClassName, inclusionProbability);
|
||||
|
||||
if (inclusionProbability < INCLUSION_THRESHOLD) {
|
||||
PrintWriter ewriter = exclusionWriters.get(exclusionClassName);
|
||||
if (ewriter != null) {
|
||||
ewriter.println(variant);
|
||||
ewriter.flush();
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
out.print(exclusionClassName + "=" + inclusionProbability + ";");
|
||||
}
|
||||
|
||||
paramsWriter.print(vec.getStudyInfo() + "\t");
|
||||
}
|
||||
|
||||
// Decide whether we should keep the call or not
|
||||
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
|
||||
variantsWriter.println(variant);
|
||||
|
||||
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
|
||||
} else {
|
||||
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:excluded\n"); }
|
||||
}
|
||||
|
||||
rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null);
|
||||
if ( dbsnp == null ) {
|
||||
paramsWriter.print("false\tfalse\t");
|
||||
} else {
|
||||
paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
|
||||
}
|
||||
|
||||
paramsWriter.println(GenotypeUtils.isHet(variant));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -294,6 +293,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
* @param result the number of loci seen.
|
||||
*/
|
||||
public void onTraversalDone(Integer result) {
|
||||
for (int i=0; i < windowSize; i++) {
|
||||
variantContextWindow.moveWindow(null);
|
||||
compute();
|
||||
}
|
||||
|
||||
out.printf("Processed %d loci.\n", result);
|
||||
|
||||
variantsWriter.close();
|
||||
|
|
|
|||
Loading…
Reference in New Issue