-Output single filtration stats file with input from all filters
-move out isHet test to GenotypeUtils so all can use it git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1369 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d88ea91939
commit
9f1d3aed26
|
|
@ -2,8 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
|||
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -53,7 +52,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
truthIndex = UNKNOWN;
|
||||
else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() )
|
||||
truthIndex = REF;
|
||||
else if ( isHet(chip) )
|
||||
else if ( GenotypeUtils.isHet(chip) )
|
||||
truthIndex = VAR_HET;
|
||||
else
|
||||
truthIndex = VAR_HOM;
|
||||
|
|
@ -63,7 +62,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
callIndex = NO_CALL;
|
||||
else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() )
|
||||
callIndex = REF;
|
||||
else if ( isHet(eval) )
|
||||
else if ( GenotypeUtils.isHet(eval) )
|
||||
callIndex = VAR_HET;
|
||||
else
|
||||
callIndex = VAR_HOM;
|
||||
|
|
@ -163,15 +162,4 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
sb.append("%");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static boolean isHet(AllelicVariant var) {
|
||||
if ( var instanceof Genotype )
|
||||
return ((Genotype)var).isHet();
|
||||
|
||||
List<String> genotype = var.getGenotype();
|
||||
if ( genotype.size() < 1 )
|
||||
return false;
|
||||
|
||||
return genotype.get(0).charAt(0) != genotype.get(0).charAt(1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,8 +60,9 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends
|
|||
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||
ratio = (double)counts.first / (double)counts.second;
|
||||
exclude = ratio < lowThreshold || ratio > highThreshold;
|
||||
int n = counts.first + counts.second;
|
||||
ratio = counts.first.doubleValue() / (double)n;
|
||||
exclude = (1.0 - ratio) < lowThreshold || ratio > highThreshold;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
|
@ -71,7 +72,7 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends
|
|||
}
|
||||
|
||||
public String getStudyHeader() {
|
||||
return "AlleleBalance("+lowThreshold+","+highThreshold+")\tMajorMinorRatio";
|
||||
return "AlleleBalance("+lowThreshold+","+highThreshold+")\tMajorRatio";
|
||||
}
|
||||
|
||||
public String getStudyInfo() {
|
||||
|
|
|
|||
|
|
@ -7,13 +7,11 @@ import org.broadinstitute.sting.utils.*;
|
|||
public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // extends RatioFilter {
|
||||
//final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.LeftTailed;
|
||||
private boolean exclude;
|
||||
private double lowThreshold, highThreshold, ratio;
|
||||
private double threshold, ratio;
|
||||
|
||||
public void initialize(String arguments) {
|
||||
if (arguments != null && !arguments.isEmpty()) {
|
||||
String[] argPieces = arguments.split(",");
|
||||
lowThreshold = Double.valueOf(argPieces[0]);
|
||||
highThreshold = Double.valueOf(argPieces[1]);
|
||||
threshold = Double.valueOf(arguments);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -60,8 +58,9 @@ public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // ext
|
|||
public void compute(char ref, LocusContext context, rodVariants variant) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
Pair<Integer, Integer> counts = scoreVariant(ref, pileup, variant);
|
||||
ratio = (double)counts.first / (double)counts.second;
|
||||
exclude = ratio < lowThreshold || ratio > highThreshold;
|
||||
int n = counts.first + counts.second;
|
||||
ratio = counts.first.doubleValue() / (double)n;
|
||||
exclude = ratio < threshold;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
|
@ -71,7 +70,7 @@ public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // ext
|
|||
}
|
||||
|
||||
public String getStudyHeader() {
|
||||
return "OnOffGenotype("+lowThreshold+","+highThreshold+")\tMajorMinorRatio";
|
||||
return "OnOffGenotype("+threshold+")\tOnRatio";
|
||||
}
|
||||
|
||||
public String getStudyInfo() {
|
||||
|
|
|
|||
|
|
@ -2,18 +2,11 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants;
|
|||
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.PackageUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.variants.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.*;
|
||||
|
|
@ -41,6 +34,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
private PrintWriter vwriter;
|
||||
private HashMap<String, PrintWriter> ewriters;
|
||||
private HashMap<String, PrintWriter> swriters;
|
||||
private final String STUDY_NAME = "study";
|
||||
private final String knownSNPDBName = "dbSNP";
|
||||
|
||||
private ArrayList<IndependentVariantFeature> requestedFeatures;
|
||||
private ArrayList<VariantExclusionCriterion> requestedExclusions;
|
||||
|
|
@ -62,11 +57,17 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
vwriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls");
|
||||
vwriter.println(AlleleFrequencyEstimate.geliHeaderString());
|
||||
|
||||
swriters = new HashMap<String, PrintWriter>();
|
||||
|
||||
if (LEARNING) {
|
||||
PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + STUDY_NAME);
|
||||
swriters.put(STUDY_NAME, studyWriter);
|
||||
studyWriter.print("#Chr\tPosition\t");
|
||||
}
|
||||
|
||||
requestedFeatures = new ArrayList<IndependentVariantFeature>();
|
||||
requestedExclusions = new ArrayList<VariantExclusionCriterion>();
|
||||
|
||||
swriters = new HashMap<String, PrintWriter>();
|
||||
|
||||
// Initialize requested features
|
||||
if (FEATURES != null) {
|
||||
for (String requestedFeatureString : FEATURES) {
|
||||
|
|
@ -81,15 +82,10 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
try {
|
||||
IndependentVariantFeature ivf = (IndependentVariantFeature) featureClass.newInstance();
|
||||
ivf.initialize(requestedFeatureArgs);
|
||||
|
||||
if (LEARNING) {
|
||||
PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + featureClassName + ".study");
|
||||
studyWriter.println(ivf.getStudyHeader());
|
||||
|
||||
swriters.put(featureClassName, studyWriter);
|
||||
}
|
||||
|
||||
requestedFeatures.add(ivf);
|
||||
|
||||
if (LEARNING)
|
||||
swriters.get(STUDY_NAME).print(ivf.getStudyHeader() + "\t");
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
|
||||
} catch (IllegalAccessException e) {
|
||||
|
|
@ -116,16 +112,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
try {
|
||||
VariantExclusionCriterion vec = (VariantExclusionCriterion) exclusionClass.newInstance();
|
||||
vec.initialize(requestedExclusionArgs);
|
||||
|
||||
if (LEARNING) {
|
||||
PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + exclusionClassName + ".study");
|
||||
studyWriter.println(vec.getStudyHeader());
|
||||
|
||||
swriters.put(exclusionClassName, studyWriter);
|
||||
}
|
||||
|
||||
requestedExclusions.add(vec);
|
||||
|
||||
if (LEARNING)
|
||||
swriters.get(STUDY_NAME).print(vec.getStudyHeader() + "\t");
|
||||
|
||||
PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls");
|
||||
writer.println(AlleleFrequencyEstimate.geliHeaderString());
|
||||
|
||||
|
|
@ -139,6 +130,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
swriters.get(STUDY_NAME).print("inDbSNP\tinHapMap\tisHet\n");
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException(String.format("Could not open file(s) for writing"));
|
||||
}
|
||||
|
|
@ -203,12 +196,14 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
if (variant != null && (!TRUTH || hapmapSite != null) && BaseUtils.simpleBaseToBaseIndex(ref) != -1) {
|
||||
if (VERBOSE) { out.println("Original:\n " + variant); }
|
||||
|
||||
if (LEARNING) {
|
||||
swriters.get(STUDY_NAME).print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t");
|
||||
}
|
||||
|
||||
// Apply features that modify the likelihoods and LOD scores
|
||||
for ( IndependentVariantFeature ivf : requestedFeatures ) {
|
||||
ivf.compute(ref, context);
|
||||
|
||||
String featureClassName = rationalizeClassName(ivf.getClass());
|
||||
|
||||
double[] weights = ivf.getLikelihoods();
|
||||
|
||||
variant.adjustLikelihoods(weights);
|
||||
|
|
@ -216,10 +211,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
|
||||
|
||||
if (LEARNING) {
|
||||
PrintWriter swriter = swriters.get(featureClassName);
|
||||
if (swriter != null) {
|
||||
swriter.println(ivf.getStudyInfo());
|
||||
}
|
||||
swriters.get(STUDY_NAME).print(ivf.getStudyInfo() + "\t");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -240,12 +232,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
if (LEARNING) {
|
||||
PrintWriter swriter = swriters.get(exclusionClassName);
|
||||
|
||||
if (swriter != null) {
|
||||
swriter.println(vec.getStudyInfo());
|
||||
swriter.flush();
|
||||
}
|
||||
swriters.get(STUDY_NAME).print(vec.getStudyInfo() + "\t");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -267,6 +254,15 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
|
||||
if (VERBOSE) { out.println(); }
|
||||
|
||||
if (LEARNING) {
|
||||
rodDbSNP dbsnp = (rodDbSNP)tracker.lookup(knownSNPDBName, null);
|
||||
if ( dbsnp == null )
|
||||
swriters.get(STUDY_NAME).print("no\tno\t");
|
||||
else
|
||||
swriters.get(STUDY_NAME).print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
|
||||
swriters.get(STUDY_NAME).println(GenotypeUtils.isHet(variant));
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import java.util.List;
|
|||
import org.broadinstitute.sting.gatk.refdata.Genotype;
|
||||
import org.broadinstitute.sting.gatk.refdata.GenotypeList;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
|
||||
/** Holds useful utility methods and auxiliary default classes for working with Genotype objects
|
||||
*
|
||||
|
|
@ -76,8 +77,17 @@ public class GenotypeUtils {
|
|||
else throw new StingException("track "+rod.getName()+" is not a Genotype or GenotypeList");
|
||||
}
|
||||
|
||||
public static boolean isHet(AllelicVariant var) {
|
||||
if ( var instanceof Genotype )
|
||||
return ((Genotype)var).isHet();
|
||||
|
||||
List<String> genotype = var.getGenotype();
|
||||
if ( genotype.size() < 1 )
|
||||
return false;
|
||||
|
||||
return genotype.get(0).charAt(0) != genotype.get(0).charAt(1);
|
||||
}
|
||||
|
||||
|
||||
/** This class represents a "default" indel-type genotype with homozygous reference (i.e. confidently no indel)
|
||||
* call. All the interface methods are implemented and return consistent values. Use this class when working with
|
||||
* genotyping data where absence of explicit indel call actually means that no evidence for an indel was observed
|
||||
|
|
|
|||
Loading…
Reference in New Issue