allow filters to specify whether they want to work with mapping quality zero reads; the VariantFiltrationWalker passes in the appropriate contextual reads

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1333 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-07-29 17:38:15 +00:00
parent 7a13647c35
commit 3554897222
6 changed files with 39 additions and 2 deletions

View File

@ -254,6 +254,8 @@ public abstract class RatioFilter implements VariantExclusionCriterion {
protected abstract boolean applyToVariant(rodVariants variant); protected abstract boolean applyToVariant(rodVariants variant);
protected abstract Pair<Integer, Integer> scoreVariant(char ref, ReadBackedPileup pileup, rodVariants variant); protected abstract Pair<Integer, Integer> scoreVariant(char ref, ReadBackedPileup pileup, rodVariants variant);
public boolean useZeroQualityReads() { return false; }
public boolean exclude(char ref, LocusContext context, rodVariants variant) { public boolean exclude(char ref, LocusContext context, rodVariants variant) {
boolean exclude = false; boolean exclude = false;

View File

@ -19,6 +19,8 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
} }
} }
public boolean useZeroQualityReads() { return false; }
public boolean exclude(char ref, LocusContext context, rodVariants variant) { public boolean exclude(char ref, LocusContext context, rodVariants variant) {
return context.getReads().size() > maximum; return context.getReads().size() > maximum;
} }

View File

@ -30,6 +30,8 @@ public class VECFisherStrand implements VariantExclusionCriterion {
return false; return false;
} }
public boolean useZeroQualityReads() { return false; }
public static boolean strandTest(char ref, LocusContext context, int allele1, int allele2, double threshold, StringBuffer out) { public static boolean strandTest(char ref, LocusContext context, int allele1, int allele2, double threshold, StringBuffer out) {
int[][] table = getContingencyTable(context, allele1, allele2); int[][] table = getContingencyTable(context, allele1, allele2);
if ( !variantIsHet(table) ) if ( !variantIsHet(table) )
@ -81,7 +83,7 @@ public class VECFisherStrand implements VariantExclusionCriterion {
return ((table[0][1] != 0 || table[0][1] != 0) && (table[1][0] != 0 || table[1][1] != 0)); return ((table[0][1] != 0 || table[0][1] != 0) && (table[1][0] != 0 || table[1][1] != 0));
} }
private void printTable(int[][] table, double pValue) { private static void printTable(int[][] table, double pValue) {
System.out.printf("%d %d; %d %d : %f\n", table[0][0], table[0][1], table[1][0], table[1][1], pValue); System.out.printf("%d %d; %d %d : %f\n", table[0][0], table[0][1], table[1][0], table[1][1], pValue);
} }

View File

@ -12,6 +12,8 @@ public class VECLodThreshold implements VariantExclusionCriterion {
} }
} }
public boolean useZeroQualityReads() { return false; }
public boolean exclude(char ref, LocusContext context, rodVariants variant) { public boolean exclude(char ref, LocusContext context, rodVariants variant) {
return (variant.getLodBtr() < lodThreshold); return (variant.getLodBtr() < lodThreshold);
} }

View File

@ -7,4 +7,5 @@ public interface VariantExclusionCriterion {
public void initialize(String arguments); public void initialize(String arguments);
public boolean exclude(char ref, LocusContext context, rodVariants variant); public boolean exclude(char ref, LocusContext context, rodVariants variant);
public boolean useZeroQualityReads();
} }

View File

@ -18,6 +18,8 @@ import java.io.FileNotFoundException;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.util.*; import java.util.*;
import net.sf.samtools.SAMRecord;
/** /**
* VariantFiltrationWalker applies specified conditionally independent features to pre-called variants, thus modifying * VariantFiltrationWalker applies specified conditionally independent features to pre-called variants, thus modifying
* the likelihoods of each genotype. At the moment, the variants are expected to be in gelitext format. * the likelihoods of each genotype. At the moment, the variants are expected to be in gelitext format.
@ -191,8 +193,12 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
// Apply exclusion tests that accept or reject the variant call // Apply exclusion tests that accept or reject the variant call
ArrayList<String> exclusionResults = new ArrayList<String>(); ArrayList<String> exclusionResults = new ArrayList<String>();
// we need to provide an alternative context without mapping quality 0 reads
// for those exclusion criterion that don't want them
LocusContext Q0freeContext = removeQ0reads(context);
for ( VariantExclusionCriterion vec : requestedExclusions ) { for ( VariantExclusionCriterion vec : requestedExclusions ) {
boolean excludeResult = vec.exclude(ref, context, variant); boolean excludeResult = vec.exclude(ref, (vec.useZeroQualityReads() ? context : Q0freeContext), variant);
if (excludeResult) { if (excludeResult) {
exclusionResults.add(rationalizeClassName(vec.getClass())); exclusionResults.add(rationalizeClassName(vec.getClass()));
@ -223,6 +229,28 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
return 0; return 0;
} }
private LocusContext removeQ0reads(LocusContext context) {
// set up the variables
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
Iterator<SAMRecord> readIter = reads.iterator();
Iterator<Integer> offsetIter = offsets.iterator();
ArrayList<SAMRecord> Q0freeReads = new ArrayList<SAMRecord>();
ArrayList<Integer> Q0freeOffsets = new ArrayList<Integer>();
// copy over good reads/offsets
while ( readIter.hasNext() ) {
SAMRecord read = readIter.next();
Integer offset = offsetIter.next();
if ( read.getMappingQuality() > 0 ) {
Q0freeReads.add(read);
Q0freeOffsets.add(offset);
}
}
return new LocusContext(context.getLocation(), Q0freeReads, Q0freeOffsets);
}
/** /**
* Increment the number of loci processed. * Increment the number of loci processed.
* *