New VariantFiltration.
Wiki docs are updated. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2105 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a78bc60c0f
commit
797bb83209
4
ivy.xml
4
ivy.xml
|
|
@ -23,6 +23,10 @@
|
||||||
<!-- Dependencies for the graph aligner -->
|
<!-- Dependencies for the graph aligner -->
|
||||||
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3" conf="default"/>
|
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3" conf="default"/>
|
||||||
|
|
||||||
|
<!-- Dependencies for VariantFiltration -->
|
||||||
|
<dependency org="commons-jexl" name="commons-jexl" rev="1.1" conf="default"/>
|
||||||
|
<dependency org="commons-logging" name="commons-logging" rev="1.1.1" conf="default"/>
|
||||||
|
|
||||||
<!-- Scala dependancies -->
|
<!-- Scala dependancies -->
|
||||||
<dependency org="org.scala-lang" name="scala-compiler" rev="2.7.7" conf="scala->default"/>
|
<dependency org="org.scala-lang" name="scala-compiler" rev="2.7.7" conf="scala->default"/>
|
||||||
<dependency org="org.scala-lang" name="scala-library" rev="2.7.7" conf="scala->default"/>
|
<dependency org="org.scala-lang" name="scala-library" rev="2.7.7" conf="scala->default"/>
|
||||||
|
|
|
||||||
|
|
@ -1,86 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2009 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.contexts;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
|
|
||||||
public class VariantContext {
|
|
||||||
private RefMetaDataTracker tracker;
|
|
||||||
private ReferenceContext ref;
|
|
||||||
private AlignmentContext context;
|
|
||||||
private RodGeliText variant;
|
|
||||||
private RodVCF variantVCF;
|
|
||||||
private AlignmentContext Q0freeContext = null;
|
|
||||||
|
|
||||||
public VariantContext(RefMetaDataTracker tracker, ReferenceContext ref,
|
|
||||||
AlignmentContext context, RodGeliText variant, RodVCF variantVCF) {
|
|
||||||
this.tracker = tracker;
|
|
||||||
this.ref = ref;
|
|
||||||
this.context = context;
|
|
||||||
this.variant = variant;
|
|
||||||
this.variantVCF = variantVCF;
|
|
||||||
}
|
|
||||||
|
|
||||||
public RefMetaDataTracker getTracker() { return tracker; }
|
|
||||||
public ReferenceContext getReferenceContext() { return ref; }
|
|
||||||
public RodGeliText getVariant() { return variant; }
|
|
||||||
public RodVCF getVariantVCF() { return variantVCF; }
|
|
||||||
public AlignmentContext getAlignmentContext(boolean useMQ0Reads) {
|
|
||||||
return (useMQ0Reads ? context : getQ0freeContext());
|
|
||||||
}
|
|
||||||
|
|
||||||
private AlignmentContext getQ0freeContext() {
|
|
||||||
if ( Q0freeContext == null ) {
|
|
||||||
// set up the variables
|
|
||||||
List<SAMRecord> reads = context.getReads();
|
|
||||||
List<Integer> offsets = context.getOffsets();
|
|
||||||
Iterator<SAMRecord> readIter = reads.iterator();
|
|
||||||
Iterator<Integer> offsetIter = offsets.iterator();
|
|
||||||
ArrayList<SAMRecord> Q0freeReads = new ArrayList<SAMRecord>();
|
|
||||||
ArrayList<Integer> Q0freeOffsets = new ArrayList<Integer>();
|
|
||||||
|
|
||||||
// copy over good reads/offsets
|
|
||||||
while ( readIter.hasNext() ) {
|
|
||||||
SAMRecord read = readIter.next();
|
|
||||||
Integer offset = offsetIter.next();
|
|
||||||
if ( read.getMappingQuality() > 0 ) {
|
|
||||||
Q0freeReads.add(read);
|
|
||||||
Q0freeOffsets.add(offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Q0freeContext = new AlignmentContext(context.getLocation(), Q0freeReads, Q0freeOffsets);
|
|
||||||
}
|
|
||||||
|
|
||||||
return Q0freeContext;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,35 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||||
|
|
||||||
|
|
||||||
|
public class ClusteredSnps {
|
||||||
|
private int window = 10;
|
||||||
|
private int snpThreshold = 3;
|
||||||
|
|
||||||
|
public ClusteredSnps(int snpThreshold, int window) {
|
||||||
|
this.window = window;
|
||||||
|
this.snpThreshold = snpThreshold;
|
||||||
|
if ( window < 1 || snpThreshold < 1 )
|
||||||
|
throw new IllegalArgumentException("Window and threshold values need to be positive values");
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean filter(VariantContextWindow contextWindow) {
|
||||||
|
|
||||||
|
Pair<RefMetaDataTracker, RodVCF>[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1);
|
||||||
|
for (int i = 0; i < snpThreshold; i++) {
|
||||||
|
if ( variants[i] == null || variants[i+snpThreshold-1] == null )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
GenomeLoc left = variants[i].second.getLocation();
|
||||||
|
GenomeLoc right = variants[i+snpThreshold-1].second.getLocation();
|
||||||
|
if ( left.getContigIndex() == right.getContigIndex() &&
|
||||||
|
Math.abs(right.getStart() - left.getStart()) <= window )
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,64 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class IVFBinomialStrand implements IndependentVariantFeature {
|
|
||||||
private double strandBalance = 0.5;
|
|
||||||
|
|
||||||
private double[] likelihoods;
|
|
||||||
|
|
||||||
public void initialize(String arguments) {
|
|
||||||
if (arguments != null && !arguments.isEmpty()) {
|
|
||||||
strandBalance = Double.valueOf(arguments);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
likelihoods = new double[10];
|
|
||||||
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
|
|
||||||
List<SAMRecord> reads = context.getAlignmentContext(useZeroQualityReads()).getReads();
|
|
||||||
String bases = pileup.getBasesAsString();
|
|
||||||
|
|
||||||
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
|
|
||||||
Genotype genotype = Genotype.values()[genotypeIndex];
|
|
||||||
|
|
||||||
char[] alleles = { genotype.toString().charAt(0), genotype.toString().charAt(1) };
|
|
||||||
int[] strandCounts = { 0, 0 };
|
|
||||||
|
|
||||||
for (int pileupIndex = 0; pileupIndex < bases.length(); pileupIndex++) {
|
|
||||||
for (int alleleIndex = 0; alleleIndex < alleles.length; alleleIndex++) {
|
|
||||||
if (bases.charAt(pileupIndex) == alleles[alleleIndex]) {
|
|
||||||
if (reads.get(pileupIndex).getReadNegativeStrandFlag()) {
|
|
||||||
strandCounts[1]++;
|
|
||||||
} else {
|
|
||||||
strandCounts[0]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
likelihoods[genotypeIndex] = Math.log10(MathUtils.binomialProbability(strandCounts[0], strandCounts[0] + strandCounts[1], strandBalance));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public double[] getLikelihoods() {
|
|
||||||
return likelihoods;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
}
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
public class IVFNull implements IndependentVariantFeature {
|
|
||||||
public void initialize(String arguments) {}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
}
|
|
||||||
|
|
||||||
public double[] getLikelihoods() {
|
|
||||||
return new double[10];
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
}
|
|
||||||
|
|
@ -1,82 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
|
||||||
|
|
||||||
public class IVFPrimaryBases implements IndependentVariantFeature {
|
|
||||||
private double[] p = { 0.933, 0.972, 0.970, 0.960, 0.945, 0.990, 0.971, 0.943, 0.978, 0.928 };
|
|
||||||
|
|
||||||
private double[] likelihoods;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method so that features can initialize themselves based on a short argument string. At the moment, each feature is
|
|
||||||
* responsible for interpreting their own argument string.
|
|
||||||
*
|
|
||||||
* @param arguments the arguments!
|
|
||||||
*/
|
|
||||||
public void initialize(String arguments) {
|
|
||||||
if (arguments != null && !arguments.isEmpty()) {
|
|
||||||
String[] argPieces = arguments.split(",");
|
|
||||||
|
|
||||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
|
||||||
p[genotypeIndex] = Double.valueOf(argPieces[genotypeIndex]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method to compute the result of this feature for each of the ten genotypes. The return value must be a double array
|
|
||||||
* of length 10 (one for each genotype) and the value must be in log10-space.
|
|
||||||
*
|
|
||||||
* @param contextWindow the context for the given locus
|
|
||||||
* @return a ten-element array of log-likelihood result of the feature applied to each genotype
|
|
||||||
*/
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
likelihoods = new double[10];
|
|
||||||
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
|
|
||||||
String primaryBases = pileup.getBasesAsString();
|
|
||||||
|
|
||||||
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
|
|
||||||
char firstAllele = Genotype.values()[genotypeIndex].toString().charAt(0);
|
|
||||||
char secondAllele = Genotype.values()[genotypeIndex].toString().charAt(1);
|
|
||||||
|
|
||||||
int offTotal = 0;
|
|
||||||
|
|
||||||
int onTotal = 0;
|
|
||||||
|
|
||||||
for (int pileupIndex = 0; pileupIndex < primaryBases.length(); pileupIndex++) {
|
|
||||||
char primaryBase = primaryBases.charAt(pileupIndex);
|
|
||||||
|
|
||||||
if (primaryBase != firstAllele && primaryBase != secondAllele) {
|
|
||||||
offTotal++;
|
|
||||||
} else {
|
|
||||||
onTotal++;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
int Total = onTotal + offTotal;
|
|
||||||
|
|
||||||
double Prior = MathUtils.binomialProbability(offTotal, Total, p[genotypeIndex]);
|
|
||||||
|
|
||||||
likelihoods[genotypeIndex] = Math.log10(Prior);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public double[] getLikelihoods() {
|
|
||||||
return likelihoods;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
}
|
|
||||||
|
|
@ -1,129 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
|
||||||
|
|
||||||
public class IVFSecondaryBases implements IndependentVariantFeature {
|
|
||||||
private double[] p2on = { 0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000 };
|
|
||||||
private double[] p2off = { 0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505 };
|
|
||||||
|
|
||||||
private double[] likelihoods;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method so that features can initialize themselves based on a short argument string. At the moment, each feature is
|
|
||||||
* responsible for interpreting their own argument string.
|
|
||||||
*
|
|
||||||
* @param arguments the arguments!
|
|
||||||
*/
|
|
||||||
public void initialize(String arguments) {
|
|
||||||
if (arguments != null && !arguments.isEmpty()) {
|
|
||||||
String[] argPieces = arguments.split(";");
|
|
||||||
String[] argOnPieces = argPieces[0].split(",");
|
|
||||||
String[] argOffPieces = argPieces[0].split(",");
|
|
||||||
|
|
||||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
|
||||||
p2on[genotypeIndex] = Double.valueOf(argOnPieces[genotypeIndex]);
|
|
||||||
p2off[genotypeIndex] = Double.valueOf(argOffPieces[genotypeIndex]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private double scaleObservation(int k, int N, double p) {
|
|
||||||
int i = 0;
|
|
||||||
double likelihood = 0.0, logLikelihood = 0.0, scale = 1.0;
|
|
||||||
|
|
||||||
do {
|
|
||||||
scale = 1.0 - ((double) i)*0.10;
|
|
||||||
|
|
||||||
int scaledK = (int) (scale * ((double) k));
|
|
||||||
int scaledN = (int) (scale * ((double) N));
|
|
||||||
|
|
||||||
likelihood = MathUtils.binomialProbability(scaledK, scaledN, p);
|
|
||||||
logLikelihood = Math.log10(likelihood);
|
|
||||||
|
|
||||||
//System.out.printf(" %d %f: %d->%d, %d->%d, %f => %f %f\n", i, scale, k, scaledK, N, scaledN, p, likelihood, logLikelihood);
|
|
||||||
|
|
||||||
i++;
|
|
||||||
} while (i < 10 && (Double.isInfinite(logLikelihood) || Double.isNaN(logLikelihood)));
|
|
||||||
|
|
||||||
//System.out.println();
|
|
||||||
|
|
||||||
return likelihood;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method to compute the result of this feature for each of the ten genotypes. The return value must be a double array
|
|
||||||
* of length 10 (one for each genotype) and the value must be in log10-space.
|
|
||||||
*
|
|
||||||
* @param contextWindow the context for the given locus
|
|
||||||
* @return a ten-element array of log-likelihood result of the feature applied to each genotype
|
|
||||||
*/
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
likelihoods = new double[10];
|
|
||||||
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()));
|
|
||||||
String primaryBases = pileup.getBasesAsString();
|
|
||||||
String secondaryBases = pileup.getSecondaryBasePileup();
|
|
||||||
|
|
||||||
for (int genotypeIndex = 0; genotypeIndex < Genotype.values().length; genotypeIndex++) {
|
|
||||||
char firstAllele = Genotype.values()[genotypeIndex].toString().charAt(0);
|
|
||||||
char secondAllele = Genotype.values()[genotypeIndex].toString().charAt(1);
|
|
||||||
|
|
||||||
int offIsGenotypic = 0;
|
|
||||||
int offTotal = 0;
|
|
||||||
|
|
||||||
int onIsGenotypic = 0;
|
|
||||||
int onTotal = 0;
|
|
||||||
|
|
||||||
for (int pileupIndex = 0; pileupIndex < primaryBases.length(); pileupIndex++) {
|
|
||||||
char primaryBase = primaryBases.charAt(pileupIndex);
|
|
||||||
|
|
||||||
if (secondaryBases != null) {
|
|
||||||
char secondaryBase = secondaryBases.charAt(pileupIndex);
|
|
||||||
|
|
||||||
if (primaryBase != firstAllele && primaryBase != secondAllele) {
|
|
||||||
if (secondaryBase == firstAllele || secondaryBase == secondAllele) {
|
|
||||||
offIsGenotypic++;
|
|
||||||
}
|
|
||||||
offTotal++;
|
|
||||||
} else {
|
|
||||||
if (secondaryBase == firstAllele || secondaryBase == secondAllele) {
|
|
||||||
onIsGenotypic++;
|
|
||||||
}
|
|
||||||
onTotal++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//System.out.printf(Genotype.values()[genotypeIndex].toString() + " on:\n");
|
|
||||||
double offPrior = scaleObservation(offIsGenotypic, offTotal, p2off[genotypeIndex]);
|
|
||||||
|
|
||||||
//System.out.printf(Genotype.values()[genotypeIndex].toString() + " off:\n");
|
|
||||||
double onPrior = scaleObservation(onIsGenotypic, onTotal, p2on[genotypeIndex]);
|
|
||||||
|
|
||||||
//System.out.printf("%s: off: %d/%d %f %f%n", Genotype.values()[genotypeIndex].toString(), offIsGenotypic, offTotal, p2off[genotypeIndex], offPrior);
|
|
||||||
//System.out.printf("%s: on: %d/%d %f %f%n", Genotype.values()[genotypeIndex].toString(), onIsGenotypic, onTotal, p2on[genotypeIndex], onPrior);
|
|
||||||
|
|
||||||
double logOffPrior = MathUtils.compareDoubles(offPrior, 0.0, 1e-10) == 0 ? Math.log10(Double.MIN_VALUE) : Math.log10(offPrior);
|
|
||||||
double logOnPrior = MathUtils.compareDoubles(onPrior, 0.0, 1e-10) == 0 ? Math.log10(Double.MIN_VALUE) : Math.log10(onPrior);
|
|
||||||
|
|
||||||
likelihoods[genotypeIndex] = logOffPrior + logOnPrior;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public double[] getLikelihoods() {
|
|
||||||
return likelihoods;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
}
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Interface for conditionally independent variant features.
|
|
||||||
*/
|
|
||||||
public interface IndependentVariantFeature {
|
|
||||||
/**
|
|
||||||
* A convenient enumeration for each of the ten genotypes.
|
|
||||||
*/
|
|
||||||
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method so that features can initialize themselves based on a short argument string.
|
|
||||||
* At the moment, each feature is responsible for interpreting their own argument string.
|
|
||||||
*
|
|
||||||
* @param arguments the arguments!
|
|
||||||
*/
|
|
||||||
public void initialize(String arguments);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method to compute the result of this feature for each of the ten genotypes. The return value must
|
|
||||||
* be a double array of length 10 (one for each genotype) and the value must be in log10-space.
|
|
||||||
*
|
|
||||||
* @param contextWindow the context for the given locus
|
|
||||||
*/
|
|
||||||
public void compute(VariantContextWindow contextWindow);
|
|
||||||
|
|
||||||
public double[] getLikelihoods();
|
|
||||||
|
|
||||||
public String getStudyHeader();
|
|
||||||
|
|
||||||
public String getStudyInfo();
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads();
|
|
||||||
}
|
|
||||||
|
|
@ -1,119 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
|
|
||||||
|
|
||||||
public abstract class RatioFilter implements VariantExclusionCriterion {
|
|
||||||
private static final double defaultMinGenotypeConfidenceToTest = 5.0; // TODO -- must be replaced with true confidence scoore, right now assumes LOD
|
|
||||||
private static final int minDepthOfCoverage = 25; // TODO -- must be replaced with a proper probability calculation
|
|
||||||
protected double minGenotypeConfidenceToTest = defaultMinGenotypeConfidenceToTest;
|
|
||||||
|
|
||||||
protected double pvalueLimit = -1;
|
|
||||||
protected Logger logger = null; // Logger.getLogger(RatioFilter.class);
|
|
||||||
protected String name = null;
|
|
||||||
|
|
||||||
protected enum Tail { LeftTailed, RightTailed, TwoTailed }
|
|
||||||
protected Tail tail = null;
|
|
||||||
protected double lowThreshold = -1;
|
|
||||||
protected double highThreshold = -1;
|
|
||||||
|
|
||||||
protected enum AnalysisType { POINT_ESTIMATE, FAIR_COIN_TEST };
|
|
||||||
protected AnalysisType analysis = AnalysisType.POINT_ESTIMATE;
|
|
||||||
protected double integralPValueThreshold = 0.05;
|
|
||||||
private final static double SEARCH_INCREMENT = 0.01;
|
|
||||||
|
|
||||||
protected boolean exclude = false;
|
|
||||||
|
|
||||||
public RatioFilter(final String name, Class myClass, Tail tail ) {
|
|
||||||
this.name = name;
|
|
||||||
this.tail = tail;
|
|
||||||
logger = Logger.getLogger(myClass);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setLowThreshold(double threshold) {
|
|
||||||
lowThreshold = threshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setHighThreshold(double threshold) {
|
|
||||||
highThreshold = threshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setIntegralPvalue(double pvalue) {
|
|
||||||
integralPValueThreshold = pvalue;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected abstract Pair<Integer, Integer> getRatioCounts(char ref, ReadBackedPileup pileup, RodGeliText variant);
|
|
||||||
protected abstract boolean excludeHetsOnly();
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
RodGeliText variant = context.getVariant();
|
|
||||||
char ref = context.getReferenceContext().getBase();
|
|
||||||
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getAlignmentContext(useZeroQualityReads()));
|
|
||||||
Pair<Integer, Integer> counts = getRatioCounts(ref, pileup, variant);
|
|
||||||
|
|
||||||
boolean highGenotypeConfidence = variant.getConsensusConfidence() > minGenotypeConfidenceToTest;
|
|
||||||
boolean excludable = !excludeHetsOnly() || GenotypeUtils.isHet(variant);
|
|
||||||
|
|
||||||
if ( analysis == AnalysisType.POINT_ESTIMATE )
|
|
||||||
exclude = excludable && highGenotypeConfidence && pointEstimateExclude(counts);
|
|
||||||
else if ( analysis == AnalysisType.FAIR_COIN_TEST )
|
|
||||||
exclude = excludable && highGenotypeConfidence && integralExclude(counts);
|
|
||||||
|
|
||||||
//
|
|
||||||
// for printing only
|
|
||||||
//
|
|
||||||
int n = counts.first + counts.second;
|
|
||||||
double value = counts.first / (1.0 * counts.first + counts.second);
|
|
||||||
//logger.info(String.format("%s: counts1=%d (%.2f), counts2=%d (%.2f), n=%d, value=%f, exclude=%b, location=%s, bases=%s",
|
|
||||||
// name, counts.first, counts.first / (0.01 * n), counts.second, counts.second / (0.01 * n), n,
|
|
||||||
// value, exclude, variant.getLocation(), pileup.getBases()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO - this calculation needs to be parameterized correctly
|
|
||||||
private boolean pointEstimateExclude(Pair<Integer, Integer> counts) {
|
|
||||||
int n = counts.first + counts.second;
|
|
||||||
if ( n < minDepthOfCoverage )
|
|
||||||
return false;
|
|
||||||
|
|
||||||
double ratio = counts.first.doubleValue() / (double)n;
|
|
||||||
return !passesThreshold(ratio);
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean integralExclude(Pair<Integer, Integer> counts) {
|
|
||||||
double sumExclude = 0.0, sumP = 0.0;
|
|
||||||
int n = counts.first + counts.second;
|
|
||||||
for ( double r = 0.0; r <= 1.0; r += SEARCH_INCREMENT ) {
|
|
||||||
double p = MathUtils.binomialProbability(counts.first, n, r);
|
|
||||||
sumP += p;
|
|
||||||
boolean exclude = ! passesThreshold(r);
|
|
||||||
sumExclude += p * (exclude ? 1.0 : 0.0);
|
|
||||||
|
|
||||||
//System.out.printf("integral: k=%d, n=%d, r=%f, p=%f, sumP = %f, exclude=%b | sum=%f, percentExcluded=%f%n",
|
|
||||||
// counts.first, n, r, p, sumP, exclude, sumExclude, sumExclude / sumP);
|
|
||||||
}
|
|
||||||
|
|
||||||
double percentExcluded = sumExclude / sumP;
|
|
||||||
|
|
||||||
return 1 - percentExcluded <= integralPValueThreshold ;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean passesThreshold(double value) {
|
|
||||||
switch ( tail ) {
|
|
||||||
case LeftTailed:
|
|
||||||
return value >= lowThreshold;
|
|
||||||
case RightTailed:
|
|
||||||
return value <= highThreshold;
|
|
||||||
case TwoTailed:
|
|
||||||
return value >= lowThreshold && value < highThreshold;
|
|
||||||
default:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,89 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
|
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
public class VECAlleleBalance extends RatioFilter {
|
|
||||||
|
|
||||||
private double lowThreshold = 0.25, highThreshold = 0.75;
|
|
||||||
private double ratio;
|
|
||||||
|
|
||||||
public VECAlleleBalance() {
|
|
||||||
super("Allele Balance Ratio", VECAlleleBalance.class, Tail.TwoTailed);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void initialize(HashMap<String,String> args) {
|
|
||||||
if ( args.get("analysis") != null ) {
|
|
||||||
if ( args.get("analysis").equalsIgnoreCase("fair_coin_test") )
|
|
||||||
analysis = AnalysisType.FAIR_COIN_TEST;
|
|
||||||
else if ( args.get("analysis").equalsIgnoreCase("point_estimate") )
|
|
||||||
analysis = AnalysisType.POINT_ESTIMATE;
|
|
||||||
}
|
|
||||||
if ( args.get("pvalue") != null )
|
|
||||||
setIntegralPvalue(Double.valueOf(args.get("pvalue")));
|
|
||||||
if ( args.get("low") != null )
|
|
||||||
lowThreshold = Double.valueOf(args.get("low"));
|
|
||||||
if ( args.get("high") != null )
|
|
||||||
highThreshold = Double.valueOf(args.get("high"));
|
|
||||||
if ( args.get("confidence") != null )
|
|
||||||
minGenotypeConfidenceToTest = Double.valueOf(args.get("confidence"));
|
|
||||||
setLowThreshold(lowThreshold);
|
|
||||||
setHighThreshold(highThreshold);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the count of bases matching the major (first) and minor (second) alleles as a pair.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
protected Pair<Integer, Integer> getRatioCounts(char ref, ReadBackedPileup pileup, RodGeliText variant) {
|
|
||||||
final String genotype = variant.getBestGenotype();
|
|
||||||
if ( genotype.length() > 2 )
|
|
||||||
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
|
|
||||||
|
|
||||||
final String bases = pileup.getBasesAsString();
|
|
||||||
if ( bases.length() == 0 ) {
|
|
||||||
ratio = 0.0;
|
|
||||||
return new Pair<Integer, Integer>(0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
char a = genotype.toUpperCase().charAt(0);
|
|
||||||
char b = genotype.toUpperCase().charAt(1);
|
|
||||||
int aCount = Utils.countOccurrences(a, bases.toUpperCase());
|
|
||||||
int bCount = Utils.countOccurrences(b, bases.toUpperCase());
|
|
||||||
|
|
||||||
int refCount = a == ref ? aCount : bCount;
|
|
||||||
int altCount = a == ref ? bCount : aCount;
|
|
||||||
|
|
||||||
ratio = (double)refCount / (double)(refCount + altCount);
|
|
||||||
return new Pair<Integer, Integer>(refCount, altCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected boolean excludeHetsOnly() { return true; }
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
// A hack for now until this filter is actually converted to an empirical filter
|
|
||||||
return exclude ? 0.0 : 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "AlleleBalance("+lowThreshold+","+highThreshold+")\tRefRatio";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "AlleleBalance";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return String.format("%.2f", ratio);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,68 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
|
|
||||||
public class VECClusteredSnps implements VariantExclusionCriterion {
|
|
||||||
private int window = 10;
|
|
||||||
private int snpThreshold = 3;
|
|
||||||
private boolean exclude;
|
|
||||||
private long distance;
|
|
||||||
|
|
||||||
public void initialize(HashMap<String,String> args) {
|
|
||||||
if ( args.get("window") != null )
|
|
||||||
window = Integer.valueOf(args.get("window"));
|
|
||||||
if ( args.get("snps") != null )
|
|
||||||
snpThreshold = Integer.valueOf(args.get("snps"));
|
|
||||||
|
|
||||||
if ( window < 2 || snpThreshold < 2 )
|
|
||||||
throw new StingException("Window and threshold values need to be >= 2");
|
|
||||||
}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
exclude = false;
|
|
||||||
|
|
||||||
VariantContext[] variants = contextWindow.getWindow(snpThreshold-1, snpThreshold-1);
|
|
||||||
for (int i = 0; i < snpThreshold; i++) {
|
|
||||||
if ( variants[i] == null || variants[i+snpThreshold-1] == null )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
GenomeLoc left = variants[i].getAlignmentContext(useZeroQualityReads()).getLocation();
|
|
||||||
GenomeLoc right = variants[i+snpThreshold-1].getAlignmentContext(useZeroQualityReads()).getLocation();
|
|
||||||
if ( left.getContigIndex() == right.getContigIndex() ) {
|
|
||||||
distance = Math.abs(right.getStart() - left.getStart());
|
|
||||||
if ( distance <= window ) {
|
|
||||||
exclude = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
// A hack for now until this filter is actually converted to an empirical filter
|
|
||||||
return exclude ? 0.0 : 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "ClusteredSnps("+window+","+snpThreshold+")\tWindow_size";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return (exclude ? "fail" : "pass") + "\t" + (exclude ? distance : "N/A");
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "ClusteredSnp";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return String.format("%d", distance);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
}
|
|
||||||
|
|
@ -1,59 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: michaelmelgar
|
|
||||||
* Date: Jun 22, 2009
|
|
||||||
* Time: 6:04:58 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class VECDepthOfCoverage implements VariantExclusionCriterion {
|
|
||||||
private int maximum = 200;
|
|
||||||
|
|
||||||
private boolean exclude = false;
|
|
||||||
private int depth;
|
|
||||||
|
|
||||||
public void initialize(HashMap<String,String> args) {
|
|
||||||
if ( args.get("max") != null )
|
|
||||||
maximum = Integer.valueOf(args.get("max"));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
depth = context.getAlignmentContext(useZeroQualityReads()).getReads().size();
|
|
||||||
exclude = depth > maximum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
// A hack for now until this filter is actually converted to an empirical filter
|
|
||||||
return exclude ? 0.0 : 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// public boolean isExcludable() {
|
|
||||||
// return exclude;
|
|
||||||
// }
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "DepthOfCoverage("+maximum+")\tdepth";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return (exclude ? "fail" : "pass") + "\t" + depth;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "DoC";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return String.format("%d", depth);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
}
|
|
||||||
|
|
@ -1,210 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import cern.jet.math.Arithmetic;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
|
|
||||||
public class VECFisherStrand implements VariantExclusionCriterion {
|
|
||||||
private double pvalueLimit = 0.00001;
|
|
||||||
private double pValue;
|
|
||||||
private boolean exclude;
|
|
||||||
|
|
||||||
public void initialize(HashMap<String,String> args) {
|
|
||||||
if ( args.get("pvalue") != null )
|
|
||||||
pvalueLimit = Double.valueOf(args.get("pvalue"));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
RodGeliText variant = context.getVariant();
|
|
||||||
int allele1 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(0));
|
|
||||||
int allele2 = BaseUtils.simpleBaseToBaseIndex(variant.getBestGenotype().charAt(1));
|
|
||||||
|
|
||||||
if (allele1 != allele2) {
|
|
||||||
exclude = strandTest(context.getReferenceContext().getBase(), context.getAlignmentContext(useZeroQualityReads()), allele1, allele2, pvalueLimit, null);
|
|
||||||
} else {
|
|
||||||
exclude = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
// A hack for now until this filter is actually converted to an empirical filter
|
|
||||||
return exclude ? 0.0 : 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "FisherStrand("+pvalueLimit+")\tpvalue";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return (exclude ? "fail" : "pass") + "\t" + pValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "strand";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return String.format("%.4f",pValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
|
|
||||||
public boolean strandTest(char ref, AlignmentContext context, int allele1, int allele2, double threshold, StringBuffer out) {
|
|
||||||
int[][] table = getContingencyTable(context, allele1, allele2);
|
|
||||||
if ( !variantIsHet(table) )
|
|
||||||
return false;
|
|
||||||
|
|
||||||
double pCutoff = computePValue(table);
|
|
||||||
//printTable(table, pCutoff);
|
|
||||||
|
|
||||||
pValue = pCutoff;
|
|
||||||
while (rotateTable(table)) {
|
|
||||||
double pValuePiece = computePValue(table);
|
|
||||||
|
|
||||||
//printTable(table, pValuePiece);
|
|
||||||
|
|
||||||
if (pValuePiece <= pCutoff) {
|
|
||||||
pValue += pValuePiece;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
table = getContingencyTable(context, allele1, allele2);
|
|
||||||
|
|
||||||
while (unrotateTable(table)) {
|
|
||||||
double pValuePiece = computePValue(table);
|
|
||||||
|
|
||||||
//printTable(table, pValuePiece);
|
|
||||||
|
|
||||||
if (pValuePiece <= pCutoff) {
|
|
||||||
pValue += pValuePiece;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//System.out.printf("P-cutoff: %f\n", pCutoff);
|
|
||||||
//System.out.printf("P-value: %f\n\n", pValue);
|
|
||||||
|
|
||||||
// optionally print out the pvalue and the alternate allele counts
|
|
||||||
if ( out != null ) {
|
|
||||||
int refBase = BaseUtils.simpleBaseToBaseIndex(ref);
|
|
||||||
table = getContingencyTable(context, allele1, allele2);
|
|
||||||
if ( allele1 != refBase )
|
|
||||||
out.append(pValue + "\t" + table[0][0] + "\t" + table[0][1] + "\t");
|
|
||||||
else
|
|
||||||
out.append(pValue + "\t" + table[1][0] + "\t" + table[1][1] + "\t");
|
|
||||||
}
|
|
||||||
|
|
||||||
return pValue < threshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean variantIsHet(int[][] table) {
|
|
||||||
return ((table[0][1] != 0 || table[0][1] != 0) && (table[1][0] != 0 || table[1][1] != 0));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void printTable(int[][] table, double pValue) {
|
|
||||||
System.out.printf("%d %d; %d %d : %f\n", table[0][0], table[0][1], table[1][0], table[1][1], pValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean rotateTable(int[][] table) {
|
|
||||||
table[0][0] -= 1;
|
|
||||||
table[1][0] += 1;
|
|
||||||
|
|
||||||
table[0][1] += 1;
|
|
||||||
table[1][1] -= 1;
|
|
||||||
|
|
||||||
return (table[0][0] >= 0 && table[1][1] >= 0) ? true : false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean unrotateTable(int[][] table) {
|
|
||||||
table[0][0] += 1;
|
|
||||||
table[1][0] -= 1;
|
|
||||||
|
|
||||||
table[0][1] -= 1;
|
|
||||||
table[1][1] += 1;
|
|
||||||
|
|
||||||
return (table[0][1] >= 0 && table[1][0] >= 0) ? true : false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private double computePValue(int[][] table) {
|
|
||||||
|
|
||||||
int[] rowSums = { sumRow(table, 0), sumRow(table, 1) };
|
|
||||||
int[] colSums = { sumColumn(table, 0), sumColumn(table, 1) };
|
|
||||||
int N = rowSums[0] + rowSums[1];
|
|
||||||
|
|
||||||
// calculate in log space so we don't die with high numbers
|
|
||||||
double pCutoff = Arithmetic.logFactorial(rowSums[0])
|
|
||||||
+ Arithmetic.logFactorial(rowSums[1])
|
|
||||||
+ Arithmetic.logFactorial(colSums[0])
|
|
||||||
+ Arithmetic.logFactorial(colSums[1])
|
|
||||||
- Arithmetic.logFactorial(table[0][0])
|
|
||||||
- Arithmetic.logFactorial(table[0][1])
|
|
||||||
- Arithmetic.logFactorial(table[1][0])
|
|
||||||
- Arithmetic.logFactorial(table[1][1])
|
|
||||||
- Arithmetic.logFactorial(N);
|
|
||||||
return Math.exp(pCutoff);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int sumRow(int[][] table, int column) {
|
|
||||||
int sum = 0;
|
|
||||||
for (int r = 0; r < table.length; r++) {
|
|
||||||
sum += table[r][column];
|
|
||||||
}
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int sumColumn(int[][] table, int row) {
|
|
||||||
int sum = 0;
|
|
||||||
for (int c = 0; c < table[row].length; c++) {
|
|
||||||
sum += table[row][c];
|
|
||||||
}
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
|
|
||||||
* fw rc
|
|
||||||
* allele1 # #
|
|
||||||
* allele2 # #
|
|
||||||
* @param context the context for the locus
|
|
||||||
* @param allele1 information for the called variant
|
|
||||||
* @param allele2 information for the called variant
|
|
||||||
* @return a 2x2 contingency table
|
|
||||||
*/
|
|
||||||
private static int[][] getContingencyTable(AlignmentContext context, int allele1, int allele2) {
|
|
||||||
|
|
||||||
int[][] table = new int[2][2];
|
|
||||||
|
|
||||||
List<SAMRecord> reads = context.getReads();
|
|
||||||
List<Integer> offsets = context.getOffsets();
|
|
||||||
|
|
||||||
for (int readIndex = 0; readIndex < reads.size(); readIndex++) {
|
|
||||||
SAMRecord read = reads.get(readIndex);
|
|
||||||
int offset = offsets.get(readIndex);
|
|
||||||
|
|
||||||
// skip over deletion sites
|
|
||||||
if ( offset == -1 )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
int readAllele = BaseUtils.simpleBaseToBaseIndex(read.getReadString().charAt(offset));
|
|
||||||
boolean isFW = !read.getReadNegativeStrandFlag();
|
|
||||||
|
|
||||||
if (readAllele == allele1 || readAllele == allele2) {
|
|
||||||
int row = (readAllele == allele1) ? 0 : 1;
|
|
||||||
int column = isFW ? 0 : 1;
|
|
||||||
|
|
||||||
table[row][column]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return table;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,126 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: mmelgar
|
|
||||||
* Date: Jul 28, 2009
|
|
||||||
* Time: 3:59:38 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class VECHomopolymer implements VariantExclusionCriterion {
|
|
||||||
|
|
||||||
private int extent = 5;
|
|
||||||
private float frac = 0.8f;
|
|
||||||
public IndexedFastaSequenceFile refSeq;
|
|
||||||
private String contig;
|
|
||||||
public ReferenceSequence contigSeq;
|
|
||||||
|
|
||||||
private boolean exclude = false;
|
|
||||||
|
|
||||||
public void initialize(HashMap<String,String> args) {
|
|
||||||
if ( args.get("extent") != null )
|
|
||||||
extent = Integer.valueOf(args.get("extent"));
|
|
||||||
if ( args.get("fraction") != null )
|
|
||||||
frac = Float.valueOf(args.get("fraction"));
|
|
||||||
|
|
||||||
File refFile = new File ("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta");
|
|
||||||
|
|
||||||
try {
|
|
||||||
refSeq = new IndexedFastaSequenceFile(refFile);
|
|
||||||
} catch (IOException e) {
|
|
||||||
refSeq = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
AlignmentContext alignmentContext = context.getAlignmentContext(useZeroQualityReads());
|
|
||||||
char[] geno = context.getVariant().getBestGenotype().toCharArray();
|
|
||||||
int[] genotype = {-1,-1};
|
|
||||||
for (int h = 0; h < 2; h ++) {
|
|
||||||
genotype[h] = BaseUtils.simpleBaseToBaseIndex(geno[h]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (contig == null || !alignmentContext.getContig().equals(contig)) {
|
|
||||||
contig = alignmentContext.getContig();
|
|
||||||
contigSeq = refSeq.getSequence(contig);
|
|
||||||
}
|
|
||||||
|
|
||||||
String bases = new String(contigSeq.getBases());
|
|
||||||
String prevbases = bases.substring((int) (alignmentContext.getPosition() - extent - 1), (int) (alignmentContext.getPosition() - 1));
|
|
||||||
String nextbases = bases.substring((int) (alignmentContext.getPosition() + 1), (int) (alignmentContext.getPosition() + extent + 1));
|
|
||||||
|
|
||||||
int[] prevCounts = {0,0,0,0};
|
|
||||||
int[] nextCounts = {0,0,0,0};
|
|
||||||
for (int i = 0; i < extent; i ++) {
|
|
||||||
int prevBaseIndex = BaseUtils.simpleBaseToBaseIndex(prevbases.toCharArray()[i]);
|
|
||||||
int nextBaseIndex = BaseUtils.simpleBaseToBaseIndex(nextbases.toCharArray()[i]);
|
|
||||||
if (prevBaseIndex != -1) {
|
|
||||||
prevCounts[prevBaseIndex] ++;
|
|
||||||
}
|
|
||||||
if (nextBaseIndex != -1) {
|
|
||||||
nextCounts[nextBaseIndex] ++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int prevMaxBase = 0;
|
|
||||||
int nextMaxBase = 0;
|
|
||||||
int prevMax = prevCounts[0];
|
|
||||||
int nextMax = nextCounts[0];
|
|
||||||
for (int j = 1; j < 4; j ++) {
|
|
||||||
if (prevCounts[j] > prevMax) {
|
|
||||||
prevMax = prevCounts[j];
|
|
||||||
prevMaxBase = j;
|
|
||||||
}
|
|
||||||
if (nextCounts[j] > nextMax) {
|
|
||||||
nextMax = nextCounts[j];
|
|
||||||
nextMaxBase = j;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int[] homopolymer = {-1,-1};
|
|
||||||
if ((float)(prevMax)/(float)(extent) > frac) {homopolymer[0] = prevMaxBase;}
|
|
||||||
if ((float)(nextMax)/(float)(extent) > frac) {homopolymer[1] = nextMaxBase;}
|
|
||||||
|
|
||||||
char ref = context.getReferenceContext().getBase();
|
|
||||||
boolean checkOne = homopolymer[0] == genotype[0] && homopolymer[0] != BaseUtils.simpleBaseToBaseIndex(ref);
|
|
||||||
boolean checkTwo = homopolymer[0] == genotype[1] && homopolymer[0] != BaseUtils.simpleBaseToBaseIndex(ref);
|
|
||||||
boolean checkThree = homopolymer[1] == genotype[0] && homopolymer[1] != BaseUtils.simpleBaseToBaseIndex(ref);
|
|
||||||
boolean checkFour = homopolymer[1] == genotype[1] && homopolymer[1] != BaseUtils.simpleBaseToBaseIndex(ref);
|
|
||||||
|
|
||||||
exclude = checkOne || checkTwo || checkThree || checkFour;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
return exclude ? 0.0 : 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "homopolymer";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return exclude ? "1" : "0";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,67 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.CleanedOutSNPROD;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
|
|
||||||
public class VECIndelArtifact implements VariantExclusionCriterion {
|
|
||||||
private boolean exclude;
|
|
||||||
private String source = "N/A";
|
|
||||||
|
|
||||||
public void initialize(HashMap<String,String> arguments) {}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
RefMetaDataTracker tracker = context.getTracker();
|
|
||||||
|
|
||||||
CleanedOutSNPROD cleanedSNP = (CleanedOutSNPROD)tracker.lookup("cleaned", null);
|
|
||||||
if ( cleanedSNP != null && !cleanedSNP.isRealSNP() ) {
|
|
||||||
exclude = true;
|
|
||||||
source = "Cleaner";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
Variation indelCall = (Variation)tracker.lookup("indels", null);
|
|
||||||
if ( indelCall != null ) {
|
|
||||||
exclude = true;
|
|
||||||
source = "IndelCall";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
rodDbSNP dbsnp = (rodDbSNP)tracker.lookup("dbSNP", null);
|
|
||||||
if ( dbsnp != null && dbsnp.isIndel() ) {
|
|
||||||
exclude = true;
|
|
||||||
source = "dbSNP";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
exclude = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
return exclude ? 0.0 : 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "IndelArtifact\tSource";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return (exclude ? "fail" : "pass") + "\t" + source;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "InDel";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return exclude ? "1" : "0";
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
}
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
public class VECLodThreshold implements VariantExclusionCriterion {
|
|
||||||
private double lodThreshold = 5.0;
|
|
||||||
private double lod;
|
|
||||||
private boolean exclude;
|
|
||||||
|
|
||||||
public void initialize(HashMap<String,String> args) {
|
|
||||||
if ( args.get("lod") != null )
|
|
||||||
lodThreshold = Double.valueOf(args.get("lod"));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
lod = context.getVariant().getLodBtr();
|
|
||||||
exclude = lod < lodThreshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
// A hack for now until this filter is actually converted to an empirical filter
|
|
||||||
return exclude ? 0.0 : 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "LodThreshold("+lodThreshold+")\tlod";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return (exclude ? "fail" : "pass") + "\t" + lod;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "LOD";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return String.format("%.2f",lod);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,47 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
public class VECLodThresholdByCoverage implements VariantExclusionCriterion {
|
|
||||||
private double slope = 0.46;
|
|
||||||
private double depth;
|
|
||||||
private double lod;
|
|
||||||
private boolean exclude;
|
|
||||||
|
|
||||||
public void initialize(HashMap<String, String> arguments) {
|
|
||||||
if (arguments.get("slope") != null) {
|
|
||||||
slope = Double.valueOf(arguments.get("slope"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
depth = (double) contextWindow.getContext().getVariant().getPileupDepth();
|
|
||||||
lod = contextWindow.getContext().getVariant().getLodBtr();
|
|
||||||
|
|
||||||
exclude = (lod < slope*depth);
|
|
||||||
}
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
return exclude ? 0.0 : 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return String.format("LodThresholdByCoverage(%f)\tdepth\tlod", slope);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return String.format("%s\t%d\t%f", exclude ? "fail" : "pass", (int) depth, lod);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "LODbyDoC";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return exclude ? "1" : "0";
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
|
|
||||||
public class VECMappingQuality implements VariantExclusionCriterion {
|
|
||||||
private double minQuality = 5.0;
|
|
||||||
private double rms;
|
|
||||||
private boolean exclude;
|
|
||||||
|
|
||||||
public void initialize(HashMap<String,String> args) {
|
|
||||||
if ( args.get("min") != null )
|
|
||||||
minQuality = Double.valueOf(args.get("min"));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
List<SAMRecord> reads = context.getAlignmentContext(useZeroQualityReads()).getReads();
|
|
||||||
int[] qualities = new int[reads.size()];
|
|
||||||
for (int i=0; i < reads.size(); i++)
|
|
||||||
qualities[i] = reads.get(i).getMappingQuality();
|
|
||||||
rms = MathUtils.rms(qualities);
|
|
||||||
exclude = rms < minQuality;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
// A hack for now until this filter is actually converted to an empirical filter
|
|
||||||
return exclude ? 0.0 : 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "MappingQuality("+minQuality+")\trms";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return (exclude ? "fail" : "pass") + "\t" + rms;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "MQ";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return String.format("%.2f", rms);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return true; }
|
|
||||||
}
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
|
|
||||||
public class VECMappingQualityZero implements VariantExclusionCriterion {
|
|
||||||
private int maximum = 50;
|
|
||||||
private int mq0Count;
|
|
||||||
private boolean exclude;
|
|
||||||
|
|
||||||
public void initialize(HashMap<String,String> args) {
|
|
||||||
if ( args.get("max") != null )
|
|
||||||
maximum = Integer.valueOf(args.get("max"));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {
|
|
||||||
VariantContext context = contextWindow.getContext();
|
|
||||||
List<SAMRecord> reads = context.getAlignmentContext(useZeroQualityReads()).getReads();
|
|
||||||
mq0Count = 0;
|
|
||||||
for (int i=0; i < reads.size(); i++) {
|
|
||||||
if ( reads.get(i).getMappingQuality() == 0 )
|
|
||||||
mq0Count++;
|
|
||||||
}
|
|
||||||
exclude = mq0Count > maximum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
// A hack for now until this filter is actually converted to an empirical filter
|
|
||||||
return exclude ? 0.0 : 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "MappingQualityZero("+maximum+")\tMQ0_count";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return (exclude ? "fail" : "pass") + "\t" + mq0Count;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "MQzero";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return String.format("%d", mq0Count);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return true; }
|
|
||||||
}
|
|
||||||
|
|
@ -1,34 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
public class VECNull implements VariantExclusionCriterion {
|
|
||||||
public void initialize(HashMap<String,String> arguments) {}
|
|
||||||
|
|
||||||
public void compute(VariantContextWindow contextWindow) {}
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
// A hack for now until this filter is actually converted to an empirical filter
|
|
||||||
return 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,85 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
public class VECOnOffGenotypeRatio extends RatioFilter {
|
|
||||||
private double threshold = 0.8;
|
|
||||||
private double ratio;
|
|
||||||
|
|
||||||
public VECOnOffGenotypeRatio() {
|
|
||||||
super("On/Off Genotype Ratio", VECOnOffGenotypeRatio.class, Tail.LeftTailed);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void initialize(HashMap<String,String> args) {
|
|
||||||
if ( args.get("analysis") != null ) {
|
|
||||||
if ( args.get("analysis").equalsIgnoreCase("fair_coin_test") )
|
|
||||||
analysis = AnalysisType.FAIR_COIN_TEST;
|
|
||||||
else if ( args.get("analysis").equalsIgnoreCase("point_estimate") )
|
|
||||||
analysis = AnalysisType.POINT_ESTIMATE;
|
|
||||||
}
|
|
||||||
if ( args.get("pvalue") != null )
|
|
||||||
setIntegralPvalue(Double.valueOf(args.get("pvalue")));
|
|
||||||
if ( args.get("threshold") != null )
|
|
||||||
threshold = Double.valueOf(args.get("threshold"));
|
|
||||||
if ( args.get("confidence") != null )
|
|
||||||
minGenotypeConfidenceToTest = Double.valueOf(args.get("confidence"));
|
|
||||||
setLowThreshold(threshold);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the counts of bases that are on (matching the bestGenotype) and off (not matching the
|
|
||||||
* best genotype). On are in the first field, off in the second.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
protected Pair<Integer, Integer> getRatioCounts(char ref, ReadBackedPileup pileup, RodGeliText variant) {
|
|
||||||
final String genotype = variant.getBestGenotype().toUpperCase();
|
|
||||||
if ( genotype.length() > 2 )
|
|
||||||
throw new IllegalArgumentException(String.format("Can only handle diploid genotypes: %s", genotype));
|
|
||||||
|
|
||||||
final String bases = pileup.getBasesAsString();
|
|
||||||
if ( bases.length() == 0 ) {
|
|
||||||
ratio = 0.0;
|
|
||||||
return new Pair<Integer, Integer>(0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
int on = 0, off = 0;
|
|
||||||
for ( char base : BaseUtils.BASES ) {
|
|
||||||
int count = BasicPileup.countBase(base, bases);
|
|
||||||
if ( Utils.countOccurrences(base, genotype) > 0 )
|
|
||||||
on += count;
|
|
||||||
else
|
|
||||||
off += count;
|
|
||||||
//System.out.printf("count = %d, on=%d, off=%d for %c in %s%n", count, on, off, base, genotype);
|
|
||||||
}
|
|
||||||
|
|
||||||
ratio = (double)on / (double)(on + off);
|
|
||||||
return new Pair<Integer, Integer>(on, off);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected boolean excludeHetsOnly() { return false; }
|
|
||||||
|
|
||||||
public double inclusionProbability() {
|
|
||||||
return exclude ? 0.0 : 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
|
||||||
|
|
||||||
public String getStudyHeader() {
|
|
||||||
return "OnOffGenotype("+threshold+")\tOnRatio";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStudyInfo() {
|
|
||||||
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVCFFilterString() {
|
|
||||||
return "onOffGenotype";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScoreString() {
|
|
||||||
return String.format("%.2f", ratio);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -25,8 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.VariantContext;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -41,14 +41,14 @@ public class VariantContextWindow {
|
||||||
/**
|
/**
|
||||||
* The variants.
|
* The variants.
|
||||||
*/
|
*/
|
||||||
private LinkedList<VariantContext> window = new LinkedList<VariantContext>();
|
private LinkedList<Pair<RefMetaDataTracker, RodVCF>> window = new LinkedList<Pair<RefMetaDataTracker, RodVCF>>();
|
||||||
private int currentContext;
|
private int currentContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Contructor for a variant context.
|
* Contructor for a variant context.
|
||||||
* @param firstVariants the first set of variants, comprising the right half of the window
|
* @param firstVariants the first set of variants, comprising the right half of the window
|
||||||
*/
|
*/
|
||||||
public VariantContextWindow(List<VariantContext> firstVariants) {
|
public VariantContextWindow(List<Pair<RefMetaDataTracker, RodVCF>> firstVariants) {
|
||||||
int windowSize = (firstVariants == null ? 1 : 2 * firstVariants.size() + 1);
|
int windowSize = (firstVariants == null ? 1 : 2 * firstVariants.size() + 1);
|
||||||
currentContext = (firstVariants == null ? 0 : firstVariants.size());
|
currentContext = (firstVariants == null ? 0 : firstVariants.size());
|
||||||
window.addAll(firstVariants);
|
window.addAll(firstVariants);
|
||||||
|
|
@ -60,7 +60,7 @@ public class VariantContextWindow {
|
||||||
* The context currently being examined.
|
* The context currently being examined.
|
||||||
* @return The current context.
|
* @return The current context.
|
||||||
*/
|
*/
|
||||||
public VariantContext getContext() {
|
public Pair<RefMetaDataTracker, RodVCF> getContext() {
|
||||||
return window.get(currentContext);
|
return window.get(currentContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -75,17 +75,17 @@ public class VariantContextWindow {
|
||||||
/**
|
/**
|
||||||
* The window around the context currently being examined.
|
* The window around the context currently being examined.
|
||||||
* @param elementsToLeft number of earlier contexts to return ()
|
* @param elementsToLeft number of earlier contexts to return ()
|
||||||
* @param elementsToLeft number of later contexts to return ()
|
* @param elementsToRight number of later contexts to return ()
|
||||||
* @return The current context window.
|
* @return The current context window.
|
||||||
*/
|
*/
|
||||||
public VariantContext[] getWindow(int elementsToLeft, int elementsToRight) {
|
public Pair<RefMetaDataTracker, RodVCF>[] getWindow(int elementsToLeft, int elementsToRight) {
|
||||||
if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() )
|
if ( elementsToLeft > maxWindowElements() || elementsToRight > maxWindowElements() )
|
||||||
throw new StingException("Too large a window requested");
|
throw new StingException("Too large a window requested");
|
||||||
if ( elementsToLeft < 0 || elementsToRight < 0 )
|
if ( elementsToLeft < 0 || elementsToRight < 0 )
|
||||||
throw new StingException("Window size cannot be negative");
|
throw new StingException("Window size cannot be negative");
|
||||||
|
|
||||||
VariantContext[] array = new VariantContext[elementsToLeft + elementsToRight + 1];
|
Pair[] array = new Pair[elementsToLeft + elementsToRight + 1];
|
||||||
ListIterator<VariantContext> iter = window.listIterator(currentContext - elementsToLeft);
|
ListIterator<Pair<RefMetaDataTracker, RodVCF>> iter = window.listIterator(currentContext - elementsToLeft);
|
||||||
for (int i = 0; i < elementsToLeft + elementsToRight + 1; i++)
|
for (int i = 0; i < elementsToLeft + elementsToRight + 1; i++)
|
||||||
array[i] = iter.next();
|
array[i] = iter.next();
|
||||||
return array;
|
return array;
|
||||||
|
|
@ -95,7 +95,7 @@ public class VariantContextWindow {
|
||||||
* Move the window along to the next context
|
* Move the window along to the next context
|
||||||
* @param context The new rightmost context
|
* @param context The new rightmost context
|
||||||
*/
|
*/
|
||||||
public void moveWindow(VariantContext context) {
|
public void moveWindow(Pair<RefMetaDataTracker, RodVCF> context) {
|
||||||
window.removeFirst();
|
window.removeFirst();
|
||||||
window.addLast(context);
|
window.addLast(context);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
public interface VariantExclusionCriterion {
|
|
||||||
public void initialize(HashMap<String,String> arguments);
|
|
||||||
public void compute(VariantContextWindow contextWindow);
|
|
||||||
|
|
||||||
//public boolean isExcludable();
|
|
||||||
public double inclusionProbability();
|
|
||||||
|
|
||||||
public String getStudyHeader();
|
|
||||||
public String getStudyInfo();
|
|
||||||
public String getVCFFilterString();
|
|
||||||
public String getScoreString();
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads();
|
|
||||||
}
|
|
||||||
|
|
@ -4,209 +4,68 @@ import org.broadinstitute.sting.gatk.contexts.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.playground.gatk.walkers.variantstovcf.VariantsToVCF;
|
|
||||||
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.PrintWriter;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.*;
|
import org.apache.commons.jexl.*;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* VariantFiltrationWalker applies specified conditionally independent features and filters to pre-called variants.
|
* VariantFiltrationWalker filters variant calls in VCF format.
|
||||||
* The former modifiesthe likelihoods of each genotype, while the latter makes a decision to include or exclude a
|
|
||||||
* variant outright. At the moment, the variants are expected to be in gelitext format.
|
|
||||||
*/
|
*/
|
||||||
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type= RodGeliText.class))
|
@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class))
|
||||||
//@Allows(value={DataSource.READS, DataSource.REFERENCE}, referenceMetaData={@RMD(name="variant",type= RodGeliText.class), @RMD(name="variantvcf",type= RodVCF.class), @RMD(name="dbsnp",type=rodDbSNP.class), @RMD(name="interval",type= IntervalRod.class)})
|
public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
@Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false)
|
||||||
@Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true) public File VCF_OUT;
|
protected String FILTER_STRING = null;
|
||||||
@Argument(fullName="sampleName", shortName="sample", doc="Temporary hack to get VCF to work: the sample (NA-ID) corresponding to the variants", required=true) public String sampleName;
|
@Argument(fullName="filterName", shortName="filterName", doc="The text to put in the FILTER field if a filter expression is provided and a variant call matches", required=false)
|
||||||
@Argument(fullName="includedOutput", shortName="included", doc="File to which all variants passing filters should be written", required=true) public String INCLUDED_OUT;
|
protected String FILTER_NAME = "GATK_filter";
|
||||||
@Argument(fullName="annotatedOutput", shortName="annotated", doc="File to which all variants should be written with annotations - for debugging/parameterizing", required=false) public String ANNOTATED_OUT;
|
|
||||||
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES;
|
|
||||||
@Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:key1=arg1,key2=arg2,...]'", required=false) public String[] EXCLUSIONS;
|
|
||||||
@Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9;
|
|
||||||
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
|
|
||||||
@Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false;
|
|
||||||
@Argument(fullName="onlyAnnotate", shortName="oa", doc="If provided, do not write output into the FILTER field, but only annotate the VCF") public boolean onlyAnnotate = false;
|
|
||||||
|
|
||||||
private List<Class<? extends IndependentVariantFeature>> featureClasses;
|
@Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize)", required=false)
|
||||||
private List<Class<? extends VariantExclusionCriterion>> exclusionClasses;
|
protected Integer clusterSize = 3;
|
||||||
|
@Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1)", required=false)
|
||||||
|
protected Integer clusterWindow = 0;
|
||||||
|
|
||||||
private VCFWriter vcfWriter;
|
@Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false)
|
||||||
private VCFHeader vcfHeader;
|
protected String MASK_NAME = "Mask";
|
||||||
private PrintWriter annotatedWriter;
|
|
||||||
private PrintWriter includedWriter;
|
|
||||||
private HashMap<String, String> sampleNames = new HashMap<String, String>();
|
|
||||||
|
|
||||||
private ArrayList<IndependentVariantFeature> requestedFeatures;
|
private VCFWriter writer = null;
|
||||||
private ArrayList<VariantExclusionCriterion> requestedExclusions;
|
|
||||||
|
private ClusteredSnps clusteredSNPs = null;
|
||||||
|
|
||||||
|
private Expression filterExpression = null;
|
||||||
|
|
||||||
// the structures necessary to initialize and maintain a windowed context
|
// the structures necessary to initialize and maintain a windowed context
|
||||||
private VariantContextWindow variantContextWindow;
|
private VariantContextWindow variantContextWindow;
|
||||||
private static final int windowSize = 10; // 10 variants on either end of the current one
|
private static final int windowSize = 10; // 10 variants on either end of the current one
|
||||||
private ArrayList<VariantContext> windowInitializer = new ArrayList<VariantContext>();
|
private ArrayList<Pair<RefMetaDataTracker, RodVCF>> windowInitializer = new ArrayList<Pair<RefMetaDataTracker, RodVCF>>();
|
||||||
|
|
||||||
private void listFiltersAndExit() {
|
|
||||||
out.println("\nAvailable features: " + getAvailableClasses(featureClasses));
|
private void initializeVcfWriter(RodVCF rod) {
|
||||||
out.println("Available exclusion criteria: " + getAvailableClasses(exclusionClasses) + "\n");
|
// setup the header fields
|
||||||
System.exit(0);
|
Map<String, String> hInfo = new HashMap<String, String>();
|
||||||
|
hInfo.put("format", VCFWriter.VERSION);
|
||||||
|
hInfo.put("source", "VariantFiltration");
|
||||||
|
hInfo.put("reference", this.getToolkit().getArguments().referenceFile.getName());
|
||||||
|
|
||||||
|
VCFHeader header = new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples());
|
||||||
|
writer = new VCFWriter(header, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Prepare the output file and the list of available features.
|
|
||||||
*/
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
featureClasses = PackageUtils.getClassesImplementingInterface(IndependentVariantFeature.class);
|
if ( clusterWindow > 0 )
|
||||||
exclusionClasses = PackageUtils.getClassesImplementingInterface(VariantExclusionCriterion.class);
|
clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow);
|
||||||
|
|
||||||
if (LIST) { listFiltersAndExit(); }
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
sampleNames.put(sampleName.toUpperCase(), "variant");
|
if ( FILTER_STRING != null )
|
||||||
vcfHeader = VariantsToVCF.getHeader(this.getToolkit().getArguments(), sampleNames.keySet());
|
filterExpression = ExpressionFactory.createExpression(FILTER_STRING);
|
||||||
vcfWriter = new VCFWriter(vcfHeader, VCF_OUT);
|
} catch (Exception e) {
|
||||||
includedWriter = new PrintWriter(INCLUDED_OUT);
|
throw new StingException("Invalid expression used (" + FILTER_STRING + "). Please see the JEXL docs for correct syntax.");
|
||||||
includedWriter.println(GeliTextWriter.headerLine);
|
|
||||||
if ( ANNOTATED_OUT != null ) {
|
|
||||||
annotatedWriter = new PrintWriter(ANNOTATED_OUT);
|
|
||||||
annotatedWriter.print("Chr\tPosition\t");
|
|
||||||
}
|
|
||||||
requestedFeatures = new ArrayList<IndependentVariantFeature>();
|
|
||||||
requestedExclusions = new ArrayList<VariantExclusionCriterion>();
|
|
||||||
|
|
||||||
// Initialize requested features
|
|
||||||
if (FEATURES != null) {
|
|
||||||
for (String requestedFeatureString : FEATURES) {
|
|
||||||
String[] requestedFeaturePieces = requestedFeatureString.split(":");
|
|
||||||
String requestedFeatureName = requestedFeaturePieces[0];
|
|
||||||
String requestedFeatureArgs = (requestedFeaturePieces.length == 2) ? requestedFeaturePieces[1] : "";
|
|
||||||
|
|
||||||
boolean found = false;
|
|
||||||
for ( Class featureClass : featureClasses ) {
|
|
||||||
String featureClassName = rationalizeClassName(featureClass);
|
|
||||||
|
|
||||||
if (requestedFeatureName.equalsIgnoreCase(featureClassName)) {
|
|
||||||
found = true;
|
|
||||||
|
|
||||||
try {
|
|
||||||
IndependentVariantFeature ivf = (IndependentVariantFeature) featureClass.newInstance();
|
|
||||||
ivf.initialize(requestedFeatureArgs);
|
|
||||||
requestedFeatures.add(ivf);
|
|
||||||
|
|
||||||
if ( annotatedWriter != null )
|
|
||||||
annotatedWriter.print(ivf.getStudyHeader() + "\t");
|
|
||||||
} catch (InstantiationException e) {
|
|
||||||
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
|
|
||||||
} catch (IllegalAccessException e) {
|
|
||||||
throw new StingException(String.format("Cannot instantiate feature class '%s': must have no-arg constructor", featureClass.getSimpleName()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!found) {
|
|
||||||
throw new StingException("Unknown feature '" + requestedFeatureString + "'. Issue the '-ls' argument to list available features.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (EXCLUSIONS != null) {
|
|
||||||
for (String requestedExclusionString : EXCLUSIONS) {
|
|
||||||
String[] requestedExclusionPieces = requestedExclusionString.split(":");
|
|
||||||
String requestedExclusionName = requestedExclusionPieces[0];
|
|
||||||
|
|
||||||
boolean found = false;
|
|
||||||
for ( Class exclusionClass : exclusionClasses ) {
|
|
||||||
String exclusionClassName = rationalizeClassName(exclusionClass);
|
|
||||||
|
|
||||||
if (requestedExclusionName.equalsIgnoreCase(exclusionClassName)) {
|
|
||||||
found = true;
|
|
||||||
|
|
||||||
try {
|
|
||||||
HashMap<String,String> requestedArgs = new HashMap<String,String>();
|
|
||||||
if ( requestedExclusionPieces.length == 2 ) {
|
|
||||||
String[] argStrings = requestedExclusionPieces[1].split(",");
|
|
||||||
for (int i = 0; i < argStrings.length; i++ ) {
|
|
||||||
String[] arg = argStrings[i].split("=");
|
|
||||||
if ( arg.length == 2 )
|
|
||||||
requestedArgs.put(arg[0], arg[1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
VariantExclusionCriterion vec = (VariantExclusionCriterion) exclusionClass.newInstance();
|
|
||||||
vec.initialize(requestedArgs);
|
|
||||||
requestedExclusions.add(vec);
|
|
||||||
|
|
||||||
if ( annotatedWriter != null )
|
|
||||||
annotatedWriter.print(vec.getStudyHeader() + "\t");
|
|
||||||
} catch (InstantiationException e) {
|
|
||||||
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName()));
|
|
||||||
} catch (IllegalAccessException e) {
|
|
||||||
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must have no-arg constructor", exclusionClass.getSimpleName()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!found) {
|
|
||||||
throw new StingException("Unknown exclusion criterion '" + requestedExclusionString + "'. Issue the '-ls' argument to list available exclusion criteria.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( annotatedWriter != null )
|
|
||||||
annotatedWriter.print("inDbSNP\tinHapMap\tisHet\n");
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new StingException(String.format("Could not open file(s) for writing"));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Trim the 'IVF' or 'VEC' off the feature/exclusion name so the user needn't specify that on the command-line.
|
|
||||||
*
|
|
||||||
* @param featureClass the feature class whose name we should rationalize
|
|
||||||
* @return the class name, minus 'IVF'
|
|
||||||
*/
|
|
||||||
private String rationalizeClassName(Class featureClass) {
|
|
||||||
String featureClassName = featureClass.getSimpleName();
|
|
||||||
String newName = featureClassName.replaceFirst("IVF", "");
|
|
||||||
newName = newName.replaceFirst("VEC", "");
|
|
||||||
return newName;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a comma-separated list of available classes the user may specify at the command-line.
|
|
||||||
*
|
|
||||||
* @param classes an ArrayList of classes
|
|
||||||
* @return String of available classes
|
|
||||||
*/
|
|
||||||
private <T> String getAvailableClasses(List<Class<? extends T>> classes) {
|
|
||||||
String availableString = "";
|
|
||||||
|
|
||||||
for (int classIndex = 0; classIndex < classes.size(); classIndex++) {
|
|
||||||
availableString += rationalizeClassName(classes.get(classIndex)) + (classIndex == classes.size() - 1 ? "" : ",");
|
|
||||||
}
|
|
||||||
|
|
||||||
return availableString;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize the number of loci processed to zero.
|
|
||||||
*
|
|
||||||
* @return 0
|
|
||||||
*/
|
|
||||||
public Integer reduceInit() { return 0; }
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* We want reads that span deletions
|
|
||||||
*
|
|
||||||
* @return true
|
|
||||||
*/
|
|
||||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For each site of interest, rescore the genotype likelihoods by applying the specified feature set.
|
* For each site of interest, rescore the genotype likelihoods by applying the specified feature set.
|
||||||
*
|
*
|
||||||
|
|
@ -216,14 +75,16 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
* @return 1 if the locus was successfully processed, 0 if otherwise
|
* @return 1 if the locus was successfully processed, 0 if otherwise
|
||||||
*/
|
*/
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
RodGeliText variant = (RodGeliText) tracker.lookup("variant", null);
|
if ( tracker == null )
|
||||||
|
|
||||||
// Ignore places where we don't have a variant or where the reference base is ambiguous.
|
|
||||||
if ( variant == null || BaseUtils.simpleBaseToBaseIndex(ref.getBase()) == -1 )
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
RodVCF variantVCF = (RodVCF) tracker.lookup("variantVCF", null);
|
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData("variant", null);
|
||||||
VariantContext varContext = new VariantContext(tracker, ref, context, variant, variantVCF);
|
// ignore places where we don't have a variant
|
||||||
|
if ( rods == null || rods.getRecords().size() == 0 )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
RodVCF variant = (RodVCF)rods.getRecords().get(0);
|
||||||
|
Pair<RefMetaDataTracker, RodVCF> varContext = new Pair<RefMetaDataTracker, RodVCF>(tracker, variant);
|
||||||
|
|
||||||
// if we're still initializing the context, do so
|
// if we're still initializing the context, do so
|
||||||
if ( windowInitializer != null ) {
|
if ( windowInitializer != null ) {
|
||||||
|
|
@ -234,117 +95,67 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
variantContextWindow.moveWindow(varContext);
|
variantContextWindow.moveWindow(varContext);
|
||||||
compute();
|
filter();
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void compute() {
|
private void filter() {
|
||||||
// get the current context
|
// get the current context
|
||||||
VariantContext context = variantContextWindow.getContext();
|
Pair<RefMetaDataTracker, RodVCF> context = variantContextWindow.getContext();
|
||||||
if ( context == null )
|
if ( context == null )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
HashMap<String, Double> exclusionResults = new HashMap<String, Double>();
|
StringBuilder filterString = new StringBuilder();
|
||||||
RodGeliText variant = context.getVariant();
|
|
||||||
GenomeLoc loc = context.getAlignmentContext(true).getLocation();
|
|
||||||
|
|
||||||
if (VERBOSE) { out.println("Original:\n" + variant); }
|
// test for SNP mask, if present
|
||||||
|
RODRecordList<ReferenceOrderedDatum> mask = context.first.getTrackData("mask", null);
|
||||||
|
if ( mask != null && mask.getRecords().size() > 0 )
|
||||||
|
addFilter(filterString, MASK_NAME);
|
||||||
|
|
||||||
if ( annotatedWriter != null )
|
// test for clustered SNPs if requested
|
||||||
annotatedWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t");
|
if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) )
|
||||||
|
addFilter(filterString, "SnpCluster");
|
||||||
|
|
||||||
// Apply features that modify the likelihoods and LOD scores
|
if ( filterExpression != null ) {
|
||||||
for ( IndependentVariantFeature ivf : requestedFeatures ) {
|
Map<String, String> infoMap = context.second.mCurrentRecord.getInfoValues();
|
||||||
ivf.compute(variantContextWindow);
|
|
||||||
double[] weights = ivf.getLikelihoods();
|
|
||||||
variant.adjustLikelihoods(weights);
|
|
||||||
|
|
||||||
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
|
JexlContext jContext = JexlHelper.createContext();
|
||||||
|
jContext.setVars(infoMap);
|
||||||
|
|
||||||
if ( annotatedWriter != null )
|
try {
|
||||||
annotatedWriter.print(ivf.getStudyInfo() + "\t");
|
if ( (Boolean)filterExpression.evaluate(jContext) )
|
||||||
}
|
addFilter(filterString, FILTER_NAME);
|
||||||
|
} catch (Exception e) {
|
||||||
// Apply exclusion tests that score the variant call
|
throw new StingException(e.getMessage());
|
||||||
if (VERBOSE) {
|
|
||||||
out.print("InclusionProbabilities:[");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use the filters to score the variant
|
|
||||||
String filterFailureString = "";
|
|
||||||
HashMap<String,String> filterINFOString = new HashMap<String,String>();
|
|
||||||
double jointInclusionProbability = 1.0;
|
|
||||||
for ( VariantExclusionCriterion vec : requestedExclusions ) {
|
|
||||||
vec.compute(variantContextWindow);
|
|
||||||
|
|
||||||
String exclusionClassName = rationalizeClassName(vec.getClass());
|
|
||||||
|
|
||||||
Double inclusionProbability = vec.inclusionProbability();
|
|
||||||
jointInclusionProbability *= inclusionProbability;
|
|
||||||
exclusionResults.put(exclusionClassName, inclusionProbability);
|
|
||||||
|
|
||||||
if (inclusionProbability < INCLUSION_THRESHOLD) {
|
|
||||||
filterFailureString += vec.getVCFFilterString() + ";";
|
|
||||||
}
|
|
||||||
filterINFOString.put(vec.getVCFFilterString(), vec.getScoreString());
|
|
||||||
|
|
||||||
if (VERBOSE) {
|
|
||||||
out.print(exclusionClassName + "=" + inclusionProbability + ";");
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( annotatedWriter != null )
|
|
||||||
annotatedWriter.print(vec.getStudyInfo() + "\t");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decide whether we should keep the call or not
|
|
||||||
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
|
|
||||||
includedWriter.println(variant);
|
|
||||||
}
|
|
||||||
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
|
|
||||||
|
|
||||||
if ( annotatedWriter != null ) {
|
|
||||||
rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null);
|
|
||||||
if ( dbsnp == null )
|
|
||||||
annotatedWriter.print("false\tfalse\t");
|
|
||||||
else
|
|
||||||
annotatedWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
|
|
||||||
annotatedWriter.println(GenotypeUtils.isHet(variant));
|
|
||||||
}
|
|
||||||
|
|
||||||
writeVCF(context, filterFailureString, filterINFOString);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private void writeVCF(VariantContext context, final String filterFailureString, HashMap<String,String> filterINFOString) {
|
|
||||||
VCFRecord rec = getVCFRecord(context);
|
|
||||||
if ( rec != null ) {
|
|
||||||
rec.addInfoFields(filterINFOString);
|
|
||||||
if ( ! onlyAnnotate && !filterFailureString.equals("") )
|
|
||||||
rec.setFilterString(filterFailureString);
|
|
||||||
vcfWriter.addRecord(rec);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
writeVCF(context.second, filterString);
|
||||||
private VCFRecord getVCFRecord(VariantContext context) {
|
}
|
||||||
if ( context.getVariantVCF() != null ) {
|
|
||||||
return context.getVariantVCF().mCurrentRecord;
|
private static void addFilter(StringBuilder sb, String filter) {
|
||||||
} else {
|
if ( sb.length() > 0 )
|
||||||
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
sb.append(";");
|
||||||
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
sb.append(filter);
|
||||||
return VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false);
|
}
|
||||||
}
|
|
||||||
|
private void writeVCF(RodVCF variant, StringBuilder filterString) {
|
||||||
|
VCFRecord rec = variant.mCurrentRecord;
|
||||||
|
|
||||||
|
if ( filterString.length() != 0 ) {
|
||||||
|
// if the record is already filtered, don't destroy those filters
|
||||||
|
if ( rec.isFiltered() )
|
||||||
|
filterString.append(";" + rec.getFilterString());
|
||||||
|
rec.setFilterString(filterString.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( writer == null )
|
||||||
|
initializeVcfWriter(variant);
|
||||||
|
writer.addRecord(rec);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Increment the number of loci processed.
|
|
||||||
*
|
|
||||||
* @param value result of the map.
|
|
||||||
* @param sum accumulator for the reduce.
|
|
||||||
* @return the new number of loci processed.
|
|
||||||
*/
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
return sum + value;
|
return sum + value;
|
||||||
}
|
}
|
||||||
|
|
@ -363,14 +174,12 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
for (int i=0; i < windowSize; i++) {
|
for (int i=0; i < windowSize; i++) {
|
||||||
variantContextWindow.moveWindow(null);
|
variantContextWindow.moveWindow(null);
|
||||||
compute();
|
filter();
|
||||||
}
|
}
|
||||||
|
|
||||||
out.printf("Processed %d loci.\n", result);
|
out.printf("Processed %d loci.\n", result);
|
||||||
|
|
||||||
vcfWriter.close();
|
if ( writer != null )
|
||||||
includedWriter.close();
|
writer.close();
|
||||||
if ( annotatedWriter != null )
|
|
||||||
annotatedWriter.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue