diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java index cb41b0061..d2448518e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java @@ -2,8 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.*; import java.util.List; import java.util.ArrayList; @@ -53,7 +52,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp truthIndex = UNKNOWN; else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() ) truthIndex = REF; - else if ( isHet(chip) ) + else if ( GenotypeUtils.isHet(chip) ) truthIndex = VAR_HET; else truthIndex = VAR_HOM; @@ -63,7 +62,7 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp callIndex = NO_CALL; else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() ) callIndex = REF; - else if ( isHet(eval) ) + else if ( GenotypeUtils.isHet(eval) ) callIndex = VAR_HET; else callIndex = VAR_HOM; @@ -163,15 +162,4 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp sb.append("%"); return sb.toString(); } - - private static boolean isHet(AllelicVariant var) { - if ( var instanceof Genotype ) - return ((Genotype)var).isHet(); - - List genotype = var.getGenotype(); - if ( genotype.size() < 1 ) - return false; - - return genotype.get(0).charAt(0) != genotype.get(0).charAt(1); - } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java index 21b94590a..65ba0ce19 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECAlleleBalance.java @@ -60,8 +60,9 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends public void compute(char ref, LocusContext context, rodVariants variant) { ReadBackedPileup pileup = new ReadBackedPileup(ref, context); Pair counts = scoreVariant(ref, pileup, variant); - ratio = (double)counts.first / (double)counts.second; - exclude = ratio < lowThreshold || ratio > highThreshold; + int n = counts.first + counts.second; + ratio = counts.first.doubleValue() / (double)n; + exclude = (1.0 - ratio) < lowThreshold || ratio > highThreshold; } public boolean useZeroQualityReads() { return false; } @@ -71,7 +72,7 @@ public class VECAlleleBalance implements VariantExclusionCriterion { //extends } public String getStudyHeader() { - return "AlleleBalance("+lowThreshold+","+highThreshold+")\tMajorMinorRatio"; + return "AlleleBalance("+lowThreshold+","+highThreshold+")\tMajorRatio"; } public String getStudyInfo() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java index 8af1987f3..89937470c 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VECOnOffGenotypeRatio.java @@ -7,13 +7,11 @@ import org.broadinstitute.sting.utils.*; public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // extends RatioFilter { //final private static GenotypeFeatureData.Tail tail = GenotypeFeatureData.Tail.LeftTailed; private boolean exclude; - private double lowThreshold, highThreshold, ratio; + private double threshold, ratio; public void initialize(String arguments) { if (arguments != null && !arguments.isEmpty()) { - String[] argPieces = arguments.split(","); - lowThreshold = Double.valueOf(argPieces[0]); - highThreshold = Double.valueOf(argPieces[1]); + threshold = Double.valueOf(arguments); } } @@ -60,8 +58,9 @@ public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // ext public void compute(char ref, LocusContext context, rodVariants variant) { ReadBackedPileup pileup = new ReadBackedPileup(ref, context); Pair counts = scoreVariant(ref, pileup, variant); - ratio = (double)counts.first / (double)counts.second; - exclude = ratio < lowThreshold || ratio > highThreshold; + int n = counts.first + counts.second; + ratio = counts.first.doubleValue() / (double)n; + exclude = ratio < threshold; } public boolean useZeroQualityReads() { return false; } @@ -71,7 +70,7 @@ public class VECOnOffGenotypeRatio implements VariantExclusionCriterion { // ext } public String getStudyHeader() { - return "OnOffGenotype("+lowThreshold+","+highThreshold+")\tMajorMinorRatio"; + return "OnOffGenotype("+threshold+")\tOnRatio"; } public String getStudyInfo() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java index 459716e37..81e82ba0a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variants/VariantFiltrationWalker.java @@ -2,18 +2,11 @@ package org.broadinstitute.sting.playground.gatk.walkers.variants; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.PackageUtils; -import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.playground.gatk.walkers.variants.*; -import java.io.File; import java.io.FileNotFoundException; import java.io.PrintWriter; import java.util.*; @@ -41,6 +34,8 @@ public class VariantFiltrationWalker extends LocusWalker { private PrintWriter vwriter; private HashMap ewriters; private HashMap swriters; + private final String STUDY_NAME = "study"; + private final String knownSNPDBName = "dbSNP"; private ArrayList requestedFeatures; private ArrayList requestedExclusions; @@ -62,11 +57,17 @@ public class VariantFiltrationWalker extends LocusWalker { vwriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls"); vwriter.println(AlleleFrequencyEstimate.geliHeaderString()); + swriters = new HashMap(); + + if (LEARNING) { + PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + STUDY_NAME); + swriters.put(STUDY_NAME, studyWriter); + studyWriter.print("#Chr\tPosition\t"); + } + requestedFeatures = new ArrayList(); requestedExclusions = new ArrayList(); - swriters = new HashMap(); - // Initialize requested features if (FEATURES != null) { for (String requestedFeatureString : FEATURES) { @@ -81,15 +82,10 @@ public class VariantFiltrationWalker extends LocusWalker { try { IndependentVariantFeature ivf = (IndependentVariantFeature) featureClass.newInstance(); ivf.initialize(requestedFeatureArgs); - - if (LEARNING) { - PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + featureClassName + ".study"); - studyWriter.println(ivf.getStudyHeader()); - - swriters.put(featureClassName, studyWriter); - } - requestedFeatures.add(ivf); + + if (LEARNING) + swriters.get(STUDY_NAME).print(ivf.getStudyHeader() + "\t"); } catch (InstantiationException e) { throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName())); } catch (IllegalAccessException e) { @@ -116,16 +112,11 @@ public class VariantFiltrationWalker extends LocusWalker { try { VariantExclusionCriterion vec = (VariantExclusionCriterion) exclusionClass.newInstance(); vec.initialize(requestedExclusionArgs); - - if (LEARNING) { - PrintWriter studyWriter = new PrintWriter(VARIANTS_OUT_HEAD + "." + exclusionClassName + ".study"); - studyWriter.println(vec.getStudyHeader()); - - swriters.put(exclusionClassName, studyWriter); - } - requestedExclusions.add(vec); + if (LEARNING) + swriters.get(STUDY_NAME).print(vec.getStudyHeader() + "\t"); + PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls"); writer.println(AlleleFrequencyEstimate.geliHeaderString()); @@ -139,6 +130,8 @@ public class VariantFiltrationWalker extends LocusWalker { } } } + + swriters.get(STUDY_NAME).print("inDbSNP\tinHapMap\tisHet\n"); } catch (FileNotFoundException e) { throw new StingException(String.format("Could not open file(s) for writing")); } @@ -203,12 +196,14 @@ public class VariantFiltrationWalker extends LocusWalker { if (variant != null && (!TRUTH || hapmapSite != null) && BaseUtils.simpleBaseToBaseIndex(ref) != -1) { if (VERBOSE) { out.println("Original:\n " + variant); } + if (LEARNING) { + swriters.get(STUDY_NAME).print(context.getLocation().getContig() + "\t" + context.getLocation().getStart() + "\t"); + } + // Apply features that modify the likelihoods and LOD scores for ( IndependentVariantFeature ivf : requestedFeatures ) { ivf.compute(ref, context); - String featureClassName = rationalizeClassName(ivf.getClass()); - double[] weights = ivf.getLikelihoods(); variant.adjustLikelihoods(weights); @@ -216,10 +211,7 @@ public class VariantFiltrationWalker extends LocusWalker { if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); } if (LEARNING) { - PrintWriter swriter = swriters.get(featureClassName); - if (swriter != null) { - swriter.println(ivf.getStudyInfo()); - } + swriters.get(STUDY_NAME).print(ivf.getStudyInfo() + "\t"); } } @@ -240,12 +232,7 @@ public class VariantFiltrationWalker extends LocusWalker { } if (LEARNING) { - PrintWriter swriter = swriters.get(exclusionClassName); - - if (swriter != null) { - swriter.println(vec.getStudyInfo()); - swriter.flush(); - } + swriters.get(STUDY_NAME).print(vec.getStudyInfo() + "\t"); } } @@ -267,6 +254,15 @@ public class VariantFiltrationWalker extends LocusWalker { if (VERBOSE) { out.println(); } + if (LEARNING) { + rodDbSNP dbsnp = (rodDbSNP)tracker.lookup(knownSNPDBName, null); + if ( dbsnp == null ) + swriters.get(STUDY_NAME).print("no\tno\t"); + else + swriters.get(STUDY_NAME).print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t"); + swriters.get(STUDY_NAME).println(GenotypeUtils.isHet(variant)); + } + return 1; } diff --git a/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java b/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java index f68a45ef2..bca4e0508 100644 --- a/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java +++ b/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java @@ -6,6 +6,7 @@ import java.util.List; import org.broadinstitute.sting.gatk.refdata.Genotype; import org.broadinstitute.sting.gatk.refdata.GenotypeList; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.AllelicVariant; /** Holds useful utility methods and auxiliary default classes for working with Genotype objects * @@ -76,8 +77,17 @@ public class GenotypeUtils { else throw new StingException("track "+rod.getName()+" is not a Genotype or GenotypeList"); } + public static boolean isHet(AllelicVariant var) { + if ( var instanceof Genotype ) + return ((Genotype)var).isHet(); + + List genotype = var.getGenotype(); + if ( genotype.size() < 1 ) + return false; + + return genotype.get(0).charAt(0) != genotype.get(0).charAt(1); + } - /** This class represents a "default" indel-type genotype with homozygous reference (i.e. confidently no indel) * call. All the interface methods are implemented and return consistent values. Use this class when working with * genotyping data where absence of explicit indel call actually means that no evidence for an indel was observed