-Output single filtration stats file with input from all filters

-move out isHet test to GenotypeUtils so all can use it


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1369 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-08-03 20:44:21 +00:00
parent d88ea91939
commit 9f1d3aed26
5 changed files with 59 additions and 65 deletions

View File

@ -2,8 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.*;
import java.util.List;
import java.util.ArrayList;
@ -53,7 +52,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
truthIndex = UNKNOWN;
else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() )
truthIndex = REF;
else if ( isHet(chip) )
else if ( GenotypeUtils.isHet(chip) )
truthIndex = VAR_HET;
else
truthIndex = VAR_HOM;
@ -63,7 +62,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
callIndex = NO_CALL;
else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() )
callIndex = REF;
else if ( isHet(eval) )
else if ( GenotypeUtils.isHet(eval) )
callIndex = VAR_HET;
else
callIndex = VAR_HOM;
@ -163,15 +162,4 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
sb.append("%");
return sb.toString();
}
private static boolean isHet(AllelicVariant var) {
if ( var instanceof Genotype )
return ((Genotype)var).isHet();
List<String> genotype = var.getGenotype();
if ( genotype.size() < 1 )
return false;
return genotype.get(0).charAt(0) != genotype.get(0).charAt(1);
}
}

View File

@ -60,8 +60,9 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends
public void compute(char ref, LocusContext context, rodVariants variant) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
ratio = (double)counts.first / (double)counts.second;
exclude = ratio < lowThreshold || ratio > highThreshold;
int n = counts.first + counts.second;
ratio = counts.first.doubleValue() / (double)n;
exclude = (1.0 - ratio) < lowThreshold || ratio > highThreshold;
}
public boolean useZeroQualityReads() { return false; }
@ -71,7 +72,7 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends
}
public String getStudyHeader() {
return "AlleleBalance("+lowThreshold+","+highThreshold+")\tMajorMinorRatio";
return "AlleleBalance("+lowThreshold+","+highThreshold+")\tMajorRatio";
}
public String getStudyInfo() {

View File

@ -7,13 +7,11 @@ import org.broadinstitute.sting.utils.*;
public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // extends RatioFilter {
//final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.LeftTailed;
private boolean exclude;
private double lowThreshold, highThreshold, ratio;
private double threshold, ratio;
public void initialize(String arguments) {
if (arguments != null && !arguments.isEmpty()) {
String[] argPieces = arguments.split(",");
lowThreshold = Double.valueOf(argPieces[0]);
highThreshold = Double.valueOf(argPieces[1]);
threshold = Double.valueOf(arguments);
}
}
@ -60,8 +58,9 @@ public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // ext
public void compute(char ref, LocusContext context, rodVariants variant) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
ratio = (double)counts.first / (double)counts.second;
exclude = ratio < lowThreshold || ratio > highThreshold;
int n = counts.first + counts.second;
ratio = counts.first.doubleValue() / (double)n;
exclude = ratio < threshold;
}
public boolean useZeroQualityReads() { return false; }
@ -71,7 +70,7 @@ public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // ext
}
public String getStudyHeader() {
return "OnOffGenotype("+lowThreshold+","+highThreshold+")\tMajorMinorRatio";
return "OnOffGenotype("+threshold+")\tOnRatio";
}
public String getStudyInfo() {

View File

@ -2,18 +2,11 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.PackageUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.playground.gatk.walkers.variants.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.*;
@ -41,6 +34,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
private PrintWriter vwriter;
private HashMap<String, PrintWriter> ewriters;
private HashMap<String, PrintWriter> swriters;
private final String STUDY_NAME = "study";
private final String knownSNPDBName = "dbSNP";
private ArrayList<IndependentVariantFeature> requestedFeatures;
private ArrayList<VariantExclusionCriterion> requestedExclusions;
@ -62,11 +57,17 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
vwriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls");
vwriter.println(AlleleFrequencyEstimate.geliHeaderString());
swriters = new HashMap<String, PrintWriter>();
if (LEARNING) {
PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + STUDY_NAME);
swriters.put(STUDY_NAME, studyWriter);
studyWriter.print("#Chr\tPosition\t");
}
requestedFeatures = new ArrayList<IndependentVariantFeature>();
requestedExclusions = new ArrayList<VariantExclusionCriterion>();
swriters = new HashMap<String, PrintWriter>();
// Initialize requested features
if (FEATURES != null) {
for (String requestedFeatureString : FEATURES) {
@ -81,15 +82,10 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
try {
IndependentVariantFeature ivf = (IndependentVariantFeature) featureClass.newInstance();
ivf.initialize(requestedFeatureArgs);
if (LEARNING) {
PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + featureClassName + ".study");
studyWriter.println(ivf.getStudyHeader());
swriters.put(featureClassName, studyWriter);
}
requestedFeatures.add(ivf);
if (LEARNING)
swriters.get(STUDY_NAME).print(ivf.getStudyHeader() + "\t");
} catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
} catch (IllegalAccessException e) {
@ -116,16 +112,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
try {
VariantExclusionCriterion vec = (VariantExclusionCriterion) exclusionClass.newInstance();
vec.initialize(requestedExclusionArgs);
if (LEARNING) {
PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + exclusionClassName + ".study");
studyWriter.println(vec.getStudyHeader());
swriters.put(exclusionClassName, studyWriter);
}
requestedExclusions.add(vec);
if (LEARNING)
swriters.get(STUDY_NAME).print(vec.getStudyHeader() + "\t");
PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls");
writer.println(AlleleFrequencyEstimate.geliHeaderString());
@ -139,6 +130,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
}
}
}
swriters.get(STUDY_NAME).print("inDbSNP\tinHapMap\tisHet\n");
} catch (FileNotFoundException e) {
throw new StingException(String.format("Could not open file(s) for writing"));
}
@ -203,12 +196,14 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
if (variant != null && (!TRUTH || hapmapSite != null) && BaseUtils.simpleBaseToBaseIndex(ref) != -1) {
if (VERBOSE) { out.println("Original:\n " + variant); }
if (LEARNING) {
swriters.get(STUDY_NAME).print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t");
}
// Apply features that modify the likelihoods and LOD scores
for ( IndependentVariantFeature ivf : requestedFeatures ) {
ivf.compute(ref, context);
String featureClassName = rationalizeClassName(ivf.getClass());
double[] weights = ivf.getLikelihoods();
variant.adjustLikelihoods(weights);
@ -216,10 +211,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
if (LEARNING) {
PrintWriter swriter = swriters.get(featureClassName);
if (swriter != null) {
swriter.println(ivf.getStudyInfo());
}
swriters.get(STUDY_NAME).print(ivf.getStudyInfo() + "\t");
}
}
@ -240,12 +232,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
}
if (LEARNING) {
PrintWriter swriter = swriters.get(exclusionClassName);
if (swriter != null) {
swriter.println(vec.getStudyInfo());
swriter.flush();
}
swriters.get(STUDY_NAME).print(vec.getStudyInfo() + "\t");
}
}
@ -267,6 +254,15 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
if (VERBOSE) { out.println(); }
if (LEARNING) {
rodDbSNP dbsnp = (rodDbSNP)tracker.lookup(knownSNPDBName, null);
if ( dbsnp == null )
swriters.get(STUDY_NAME).print("no\tno\t");
else
swriters.get(STUDY_NAME).print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
swriters.get(STUDY_NAME).println(GenotypeUtils.isHet(variant));
}
return 1;
}

View File

@ -6,6 +6,7 @@ import java.util.List;
import org.broadinstitute.sting.gatk.refdata.Genotype;
import org.broadinstitute.sting.gatk.refdata.GenotypeList;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
/** Holds useful utility methods and auxiliary default classes for working with Genotype objects
*
@ -76,8 +77,17 @@ public class GenotypeUtils {
else throw new StingException("track "+rod.getName()+" is not a Genotype or GenotypeList");
}
public static boolean isHet(AllelicVariant var) {
if ( var instanceof Genotype )
return ((Genotype)var).isHet();
List<String> genotype = var.getGenotype();
if ( genotype.size() < 1 )
return false;
return genotype.get(0).charAt(0) != genotype.get(0).charAt(1);
}
/** This class represents a "default" indel-type genotype with homozygous reference (i.e. confidently no indel)
* call. All the interface methods are implemented and return consistent values. Use this class when working with
* genotyping data where absence of explicit indel call actually means that no evidence for an indel was observed