Reworking of the VariantFiltration system to allow for a windowed view of variants and inclusion of more data to the various filters.

This now allows us to incorporate both the clustered SNP filter and a SNP-near-indels filter, which otherwise wasn't possible.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1484 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-08-31 02:16:39 +00:00
parent 2402dcd4c9
commit 215e908a11
19 changed files with 354 additions and 162 deletions

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.contexts;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import net.sf.samtools.SAMRecord;
import java.util.*;
public class VariantContext {
private RefMetaDataTracker tracker;
private ReferenceContext ref;
private AlignmentContext context;
private rodVariants variant;
private AlignmentContext Q0freeContext = null;
public VariantContext(RefMetaDataTracker tracker, ReferenceContext ref,
AlignmentContext context, rodVariants variant) {
this.tracker = tracker;
this.ref = ref;
this.context = context;
this.variant = variant;
}
public RefMetaDataTracker getTracker() { return tracker; }
public ReferenceContext getReferenceContext() { return ref; }
public rodVariants getVariant() { return variant; }
public AlignmentContext getAlignmentContext() { return getAlignmentContext(false); }
public AlignmentContext getAlignmentContext(boolean useMQ0Reads) {
return (useMQ0Reads ? context : getQ0freeContext());
}
private AlignmentContext getQ0freeContext() {
if ( Q0freeContext == null ) {
// set up the variables
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
Iterator<SAMRecord> readIter = reads.iterator();
Iterator<Integer> offsetIter = offsets.iterator();
ArrayList<SAMRecord> Q0freeReads = new ArrayList<SAMRecord>();
ArrayList<Integer> Q0freeOffsets = new ArrayList<Integer>();
// copy over good reads/offsets
while ( readIter.hasNext() ) {
SAMRecord read = readIter.next();
Integer offset = offsetIter.next();
if ( read.getMappingQuality() > 0 ) {
Q0freeReads.add(read);
Q0freeOffsets.add(offset);
}
}
Q0freeContext = new AlignmentContext(context.getLocation(), Q0freeReads, Q0freeOffsets);
}
return Q0freeContext;
}
}

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
@ -18,11 +18,12 @@ public class IVFBinomialStrand implements IndependentVariantFeature {
}
}
public void compute(char ref, AlignmentContext context) {
public void compute(VariantContextWindow contextWindow) {
VariantContext context = contextWindow.getContext();
likelihoods = new double[10];
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
List<SAMRecord> reads = context.getReads();
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext());
List<SAMRecord> reads = context.getAlignmentContext().getReads();
String bases = pileup.getBases();
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {

View File

@ -1,11 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
public class IVFNull implements IndependentVariantFeature {
public void initialize(String arguments) {}
public void compute(char ref, AlignmentContext context) {
public void compute(VariantContextWindow contextWindow) {
}
public double[] getLikelihoods() {

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
@ -29,14 +29,14 @@ public class IVFPrimaryBases implements IndependentVariantFeature {
* Method to compute the result of this feature for each of the ten genotypes. The return value must be a double array
* of length 10 (one for each genotype) and the value must be in log10-space.
*
* @param ref the reference base
* @param context the context for the given locus
* @param contextWindow the context for the given locus
* @return a ten-element array of log-likelihood result of the feature applied to each genotype
*/
public void compute(char ref, AlignmentContext context) {
public void compute(VariantContextWindow contextWindow) {
VariantContext context = contextWindow.getContext();
likelihoods = new double[10];
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext());
String primaryBases = pileup.getBases();
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
@ -56,14 +56,14 @@ public class IVFSecondaryBases implements IndependentVariantFeature {
* Method to compute the result of this feature for each of the ten genotypes. The return value must be a double array
* of length 10 (one for each genotype) and the value must be in log10-space.
*
* @param ref the reference base
* @param context the context for the given locus
* @param contextWindow the context for the given locus
* @return a ten-element array of log-likelihood result of the feature applied to each genotype
*/
public void compute(char ref, AlignmentContext context) {
public void compute(VariantContextWindow contextWindow) {
VariantContext context = contextWindow.getContext();
likelihoods = new double[10];
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext());
String primaryBases = pileup.getBases();
String secondaryBases = pileup.getSecondaryBasePileup();

View File

@ -1,7 +1,5 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
/**
* Interface for conditionally independent variant features.
*/
@ -23,10 +21,9 @@ public interface IndependentVariantFeature {
* Method to compute the result of this feature for each of the ten genotypes. The return value must
* be a double array of length 10 (one for each genotype) and the value must be in log10-space.
*
* @param ref the reference base
* @param context the context for the given locus
* @param contextWindow the context for the given locus
*/
public void compute(char ref, AlignmentContext context);
public void compute(VariantContextWindow contextWindow);
public double[] getLikelihoods();

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.refdata.TabularROD;
import org.broadinstitute.sting.utils.*;
@ -255,10 +255,13 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
public boolean useZeroQualityReads() { return false; }
public void compute(char ref, AlignmentContext context, rodVariants variant) {
public void compute(VariantContextWindow contextWindow) {
boolean exclude = false;
VariantContext context = contextWindow.getContext();
rodVariants variant = context.getVariant();
char ref = context.getReferenceContext().getBase();
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext());
if ( applyToVariant(variant) ) {
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
GenotypeFeatureData gfd = dataTable.get(orderedBases(variant.getBestGenotype()));

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.Pair;
@ -52,8 +52,12 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends
return new Pair(refCount, altCount);
}
public void compute(char ref, AlignmentContext context, rodVariants variant) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
public void compute(VariantContextWindow contextWindow) {
VariantContext context = contextWindow.getContext();
char ref = context.getReferenceContext().getBase();
rodVariants variant = context.getVariant();
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext());
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
int n = counts.first + counts.second;
ratio = counts.first.doubleValue() / (double)n;

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
/**
* Created by IntelliJ IDEA.
@ -22,9 +22,10 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
}
}
public void compute(char ref, AlignmentContext context, rodVariants variant) {
exclude = context.getReads().size() > maximum;
depth = context.getReads().size();
public void compute(VariantContextWindow contextWindow) {
VariantContext context = contextWindow.getContext();
depth = context.getAlignmentContext().getReads().size();
exclude = depth > maximum;
}
public double inclusionProbability() {

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.utils.BaseUtils;
import net.sf.samtools.SAMRecord;
@ -22,12 +23,14 @@ public class VECFisherStrand implements VariantExclusionCriterion {
factorials.add(0.0); // add fact(0)
}
public void compute(char ref, AlignmentContext context, rodVariants variant) {
public void compute(VariantContextWindow contextWindow) {
VariantContext context = contextWindow.getContext();
rodVariants variant = context.getVariant();
int allele1 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(0));
int allele2 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(1));
if (allele1 != allele2) {
exclude = strandTest(ref, context, allele1, allele2, pvalueLimit, null);
exclude = strandTest(context.getReferenceContext().getBase(), context.getAlignmentContext(), allele1, allele2, pvalueLimit, null);
} else {
exclude = false;
}

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
@ -54,22 +54,23 @@ public class VECHomopolymer implements VariantExclusionCriterion {
return "";
}
public void compute(char ref, AlignmentContext context, rodVariants variant) {
char[] geno = variant.getBestGenotype().toCharArray();
public void compute(VariantContextWindow contextWindow) {
VariantContext context = contextWindow.getContext();
AlignmentContext alignmentContext = context.getAlignmentContext();
char[] geno = context.getVariant().getBestGenotype().toCharArray();
int[] genotype = {-1,-1};
for (int h = 0; h < 2; h ++) {
genotype[h] = BaseUtils.simpleBaseToBaseIndex(geno[h]);
}
if (contig == null || !context.getContig().equals(contig)) {
contig = context.getContig();
if (contig == null || !alignmentContext.getContig().equals(contig)) {
contig = alignmentContext.getContig();
contigSeq = refSeq.getSequence(contig);
}
String bases = new String(contigSeq.getBases());
String prevbases = bases.substring((int) (context.getPosition() - extent - 1), (int) (context.getPosition() - 1));
String nextbases = bases.substring((int) (context.getPosition() + 1), (int) (context.getPosition() + extent + 1));
String prevbases = bases.substring((int) (alignmentContext.getPosition() - extent - 1), (int) (alignmentContext.getPosition() - 1));
String nextbases = bases.substring((int) (alignmentContext.getPosition() + 1), (int) (alignmentContext.getPosition() + extent + 1));
int[] prevCounts = {0,0,0,0};
int[] nextCounts = {0,0,0,0};
@ -103,6 +104,7 @@ public class VECHomopolymer implements VariantExclusionCriterion {
if ((float)(prevMax)/(float)(extent) > frac) {homopolymer[0] = prevMaxBase;}
if ((float)(nextMax)/(float)(extent) > frac) {homopolymer[1] = nextMaxBase;}
char ref = context.getReferenceContext().getBase();
boolean checkOne = homopolymer[0] == genotype[0] && homopolymer[0] != BaseUtils.simpleBaseToBaseIndex(ref);
boolean checkTwo = homopolymer[0] == genotype[1] && homopolymer[0] != BaseUtils.simpleBaseToBaseIndex(ref);
boolean checkThree = homopolymer[1] == genotype[0] && homopolymer[1] != BaseUtils.simpleBaseToBaseIndex(ref);

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
public class VECLodThreshold implements VariantExclusionCriterion {
private double lodThreshold = 5.0;
@ -14,8 +13,9 @@ public class VECLodThreshold implements VariantExclusionCriterion {
}
}
public void compute(char ref, AlignmentContext context, rodVariants variant) {
lod = variant.getLodBtr();
public void compute(VariantContextWindow contextWindow) {
VariantContext context = contextWindow.getContext();
lod = context.getVariant().getLodBtr();
exclude = lod < lodThreshold;
}

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.utils.MathUtils;
import net.sf.samtools.SAMRecord;
@ -19,8 +18,9 @@ public class VECMappingQuality implements VariantExclusionCriterion {
}
}
public void compute(char ref, AlignmentContext context, rodVariants variant) {
List<SAMRecord> reads = context.getReads();
public void compute(VariantContextWindow contextWindow) {
VariantContext context = contextWindow.getContext();
List<SAMRecord> reads = context.getAlignmentContext().getReads();
int[] qualities = new int[reads.size()];
for (int i=0; i < reads.size(); i++)
qualities[i] = reads.get(i).getMappingQuality();

View File

@ -1,8 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import net.sf.samtools.SAMRecord;
import java.util.List;
@ -19,8 +17,9 @@ public class VECMappingQualityZero implements VariantExclusionCriterion {
}
}
public void compute(char ref, AlignmentContext context, rodVariants variant) {
List<SAMRecord> reads = context.getReads();
public void compute(VariantContextWindow contextWindow) {
VariantContext context = contextWindow.getContext();
List<SAMRecord> reads = context.getAlignmentContext(useZeroQualityReads()).getReads();
mq0Count = 0;
for (int i=0; i < reads.size(); i++) {
if ( reads.get(i).getMappingQuality() == 0 )

View File

@ -1,13 +1,10 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
public class VECNull implements VariantExclusionCriterion {
public void initialize(String arguments) {
}
public void compute(char ref, AlignmentContext context, rodVariants variant) {
public void compute(VariantContextWindow contextWindow) {
}
public double inclusionProbability() {

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.utils.*;
public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // extends RatioFilter {
@ -46,9 +46,12 @@ public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // ext
return new Pair<Integer, Integer>(on, off);
}
public void compute(char ref, AlignmentContext context, rodVariants variant) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
public void compute(VariantContextWindow contextWindow) {
VariantContext context = contextWindow.getContext();
char ref = context.getReferenceContext().getBase();
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext());
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, context.getVariant());
int n = counts.first + counts.second;
ratio = counts.first.doubleValue() / (double)n;
exclude = ratio < threshold;

View File

@ -0,0 +1,100 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.VariantContext;
import org.broadinstitute.sting.utils.StingException;
import java.util.*;
/**
* A window of variants surrounding the current variant being investigated
*
* @author ebanks
* @version 0.1
*/
public class VariantContextWindow {
/**
* The variants.
*/
private LinkedList<VariantContext> window = new LinkedList<VariantContext>();
private int currentContext;
/**
* Contructor for a variant context.
* @param firstVariants the first set of variants, comprising the right half of the window
*/
public VariantContextWindow(List<VariantContext> firstVariants) {
int windowSize = (firstVariants == null ? 1 : 2 * firstVariants.size() + 1);
currentContext = (firstVariants == null ? 0 : firstVariants.size());
window.addAll(firstVariants);
while ( window.size() < windowSize )
window.addFirst(null);
}
/**
* The context currently being examined.
* @return The current context.
*/
public VariantContext getContext() {
return window.get(currentContext);
}
/**
* The maximum number of elements that can be requested on either end of the current context.
* @return max.
*/
public int maxWindowElements() {
return currentContext;
}
/**
* The window around the context currently being examined.
* @param elementsToLeft number of earlier contexts to return ()
* @param elementsToLeft number of later contexts to return ()
* @return The current context window.
*/
public VariantContext[] getWindow(int elementsToLeft, int elementsToRight) {
if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() )
throw new StingException("Too large a window requested");
VariantContext[] array = new VariantContext[elementsToLeft + elementsToRight + 1];
ListIterator<VariantContext> iter = window.listIterator(currentContext - elementsToLeft);
for (int i = 0; i < elementsToLeft + elementsToRight + 1; i++)
array[i] = iter.next();
return array;
}
/**
* Move the window along to the next context
* @param context The new rightmost context
*/
public void moveWindow(VariantContext context) {
window.removeFirst();
window.addLast(context);
}
}

View File

@ -1,12 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.rodVariants;
public interface VariantExclusionCriterion {
public void initialize(String arguments);
public void compute(char ref, AlignmentContext context, rodVariants variant);
public void compute(VariantContextWindow contextWindow);
//public boolean isExcludable();
public double inclusionProbability();

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
@ -12,7 +11,6 @@ import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.*;
import net.sf.samtools.SAMRecord;
/**
* VariantFiltrationWalker applies specified conditionally independent features and filters to pre-called variants.
@ -38,6 +36,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
private ArrayList<IndependentVariantFeature> requestedFeatures;
private ArrayList<VariantExclusionCriterion> requestedExclusions;
// the structures necessary to initialize and maintain a windowed context
private VariantContextWindow variantContextWindow;
private static final int windowSize = 10; // 10 variants on either end of the current one
private ArrayList<VariantContext> windowInitializer = new ArrayList<VariantContext>();
/**
* Prepare the output file and the list of available features.
*/
@ -174,107 +177,103 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
*/
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
rodVariants variant = (rodVariants) tracker.lookup("variant", null);
// Ignore places where we don't have a variant or where the reference base is ambiguous.
if (variant != null && BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) {
HashMap<String, Double> exclusionResults = new HashMap<String, Double>();
if ( variant == null || BaseUtils.simpleBaseToBaseIndex(ref.getBase()) == -1 )
return 0;
if (VERBOSE) { out.println("Original:\n" + variant); }
VariantContext varContext = new VariantContext(tracker, ref, context, variant);
paramsWriter.print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t");
// Apply features that modify the likelihoods and LOD scores
for ( IndependentVariantFeature ivf : requestedFeatures ) {
ivf.compute(ref.getBase(), context);
double[] weights = ivf.getLikelihoods();
variant.adjustLikelihoods(weights);
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
paramsWriter.print(ivf.getStudyInfo() + "\t");
// if we're still initializing the context, do so
if ( windowInitializer != null ) {
windowInitializer.add(varContext);
if ( windowInitializer.size() == windowSize ) {
variantContextWindow = new VariantContextWindow(windowInitializer);
windowInitializer = null;
}
// We need to provide an alternative context without mapping quality 0 reads
// for those exclusion criterion that don't want them
AlignmentContext Q0freeContext = removeQ0reads(context);
// Apply exclusion tests that score the variant call
if (VERBOSE) {
out.print("InclusionProbabilities:[");
}
// Use the filters to score the variant
double jointInclusionProbability = 1.0;
for ( VariantExclusionCriterion vec : requestedExclusions ) {
vec.compute(ref.getBase(), (vec.useZeroQualityReads() ? context : Q0freeContext), variant);
String exclusionClassName = rationalizeClassName(vec.getClass());
Double inclusionProbability = vec.inclusionProbability();
jointInclusionProbability *= inclusionProbability;
exclusionResults.put(exclusionClassName, inclusionProbability);
if (inclusionProbability < INCLUSION_THRESHOLD) {
PrintWriter ewriter = exclusionWriters.get(exclusionClassName);
if (ewriter != null) {
ewriter.println(variant);
ewriter.flush();
}
}
if (VERBOSE) {
out.print(exclusionClassName + "=" + inclusionProbability + ";");
}
paramsWriter.print(vec.getStudyInfo() + "\t");
}
// Decide whether we should keep the call or not
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
variantsWriter.println(variant);
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
} else {
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:excluded\n"); }
}
rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbSNP", null);
if ( dbsnp == null ) {
paramsWriter.print("false\tfalse\t");
} else {
paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
}
paramsWriter.println(GenotypeUtils.isHet(variant));
return 1;
} else {
variantContextWindow.moveWindow(varContext);
compute();
}
return 0;
return 1;
}
private AlignmentContext removeQ0reads(AlignmentContext context) {
// set up the variables
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
Iterator<SAMRecord> readIter = reads.iterator();
Iterator<Integer> offsetIter = offsets.iterator();
ArrayList<SAMRecord> Q0freeReads = new ArrayList<SAMRecord>();
ArrayList<Integer> Q0freeOffsets = new ArrayList<Integer>();
private void compute() {
// get the current context
VariantContext context = variantContextWindow.getContext();
if ( context == null )
return;
rodVariants variant = context.getVariant();
// copy over good reads/offsets
while ( readIter.hasNext() ) {
SAMRecord read = readIter.next();
Integer offset = offsetIter.next();
if ( read.getMappingQuality() > 0 ) {
Q0freeReads.add(read);
Q0freeOffsets.add(offset);
}
HashMap<String, Double> exclusionResults = new HashMap<String, Double>();
if (VERBOSE) { out.println("Original:\n" + variant); }
GenomeLoc loc = context.getAlignmentContext().getLocation();
paramsWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t");
// Apply features that modify the likelihoods and LOD scores
for ( IndependentVariantFeature ivf : requestedFeatures ) {
ivf.compute(variantContextWindow);
double[] weights = ivf.getLikelihoods();
variant.adjustLikelihoods(weights);
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
paramsWriter.print(ivf.getStudyInfo() + "\t");
}
return new AlignmentContext(context.getLocation(), Q0freeReads, Q0freeOffsets);
// Apply exclusion tests that score the variant call
if (VERBOSE) {
out.print("InclusionProbabilities:[");
}
// Use the filters to score the variant
double jointInclusionProbability = 1.0;
for ( VariantExclusionCriterion vec : requestedExclusions ) {
vec.compute(variantContextWindow);
String exclusionClassName = rationalizeClassName(vec.getClass());
Double inclusionProbability = vec.inclusionProbability();
jointInclusionProbability *= inclusionProbability;
exclusionResults.put(exclusionClassName, inclusionProbability);
if (inclusionProbability < INCLUSION_THRESHOLD) {
PrintWriter ewriter = exclusionWriters.get(exclusionClassName);
if (ewriter != null) {
ewriter.println(variant);
ewriter.flush();
}
}
if (VERBOSE) {
out.print(exclusionClassName + "=" + inclusionProbability + ";");
}
paramsWriter.print(vec.getStudyInfo() + "\t");
}
// Decide whether we should keep the call or not
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
variantsWriter.println(variant);
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
} else {
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:excluded\n"); }
}
rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null);
if ( dbsnp == null ) {
paramsWriter.print("false\tfalse\t");
} else {
paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
}
paramsWriter.println(GenotypeUtils.isHet(variant));
}
/**
@ -294,6 +293,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
* @param result the number of loci seen.
*/
public void onTraversalDone(Integer result) {
for (int i=0; i < windowSize; i++) {
variantContextWindow.moveWindow(null);
compute();
}
out.printf("Processed %d loci.\n", result);
variantsWriter.close();