diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 83a16f680..4c3fedb26 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -77,6 +77,13 @@ public class VariantContextUtils { return VariantContextUtils.initializeMatchExps(map); } + public static List initializeMatchExps(ArrayList names, ArrayList exps) { + String[] nameArray = new String[names.size()]; + String[] expArray = new String[exps.size()]; + return initializeMatchExps(names.toArray(nameArray), exps.toArray(expArray)); + } + + /** * Method for creating JexlVCMatchExp from input walker arguments mapping from names to exps. These two arrays contain * the name associated with each JEXL expression. initializeMatchExps will parse each expression and return @@ -191,12 +198,13 @@ public class VariantContextUtils { VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase ) { - return simpleMerge(unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set"); + return simpleMerge(unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false); } -public static VariantContext simpleMerge(Collection unsortedVCs, List priorityListOfVCs, - VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, - boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey ) { + public static VariantContext simpleMerge(Collection unsortedVCs, List priorityListOfVCs, + VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, + boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey, + boolean filteredAreUncalled ) { if ( unsortedVCs == null || unsortedVCs.size() == 0 ) return null; @@ -211,9 +219,13 @@ public static VariantContext simpleMerge(Collection unsortedVCs, List VCs = new ArrayList(); for (VariantContext vc : prepaddedVCs) { - VCs.add(createVariantContextWithPaddedAlleles(vc,inputRefBase)); + // also a reasonable place to remove filtered calls, if needed + if ( ! filteredAreUncalled || vc.isNotFiltered() ) + VCs.add(createVariantContextWithPaddedAlleles(vc,inputRefBase)); } + if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled + return null; // establish the baseline info from the first VC VariantContext first = VCs.get(0); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MyHaplotypeScore.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MyHaplotypeScore.java index 094a4b594..572923978 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MyHaplotypeScore.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MyHaplotypeScore.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The Broad Institute + * Copyright (c) 2010, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -12,15 +12,14 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.sting.gatk.walkers.annotator; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java index 011282740..ddc5dda4c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java @@ -38,7 +38,7 @@ import java.util.*; @Analysis(name = "Genotype Concordance", description = "Determine the genotype concordance between the genotypes in difference tracks") public class GenotypeConcordance extends VariantEvaluator { - private static final boolean PRINT_INTERESTING_SITES = false; + private static final boolean PRINT_INTERESTING_SITES = true; protected final static Logger logger = Logger.getLogger(GenotypeConcordance.class); @@ -306,7 +306,8 @@ public class GenotypeConcordance extends VariantEvaluator { final Genotype.Type truth = validation.getGenotype(sample).getType(); sampleStats.incrValue(sample, truth, called); if ( (truth == Genotype.Type.HOM_VAR || truth == Genotype.Type.HET) && called == Genotype.Type.NO_CALL ) { - if ( PRINT_INTERESTING_SITES ) System.out.printf("%s: HM3 FN => %s%n", group, validation); + if ( PRINT_INTERESTING_SITES && super.getVEWalker().printInterestingSites() ) + System.out.printf("%s: HM3 FN => %s%n", group, validation); } } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 4b69e4be2..884174879 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -38,6 +38,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.gatk.walkers.variantrecalibration.ApplyVariantCuts; import org.broadinstitute.sting.playground.utils.report.ReportMarshaller; import org.broadinstitute.sting.playground.utils.report.VE2ReportFactory; import org.broadinstitute.sting.playground.utils.report.templates.ReportFormat; @@ -114,7 +115,7 @@ public class VariantEvalWalker extends RodWalker { // -------------------------------------------------------------------------------------------------------------- @Argument(shortName="select", doc="One or more stratifications to use when evaluating the data", required=false) - protected String[] SELECT_EXPS = {}; + protected ArrayList SELECT_EXPS = new ArrayList(); //protected String[] SELECT_EXPS = {"set == \"Intersection\"", // "set == \"HiSeq.WGS.cleaned.ug.vcf\"", // "set == \"HiSeq.WGS.cleaned.ug.vcf\" || set == \"Intersection\"", @@ -122,7 +123,7 @@ public class VariantEvalWalker extends RodWalker { // "set == \"HiSeq.WGS.raw.OQ.ug.vcf\" || set == \"Intersection\""}; @Argument(shortName="selectName", doc="Names to use for the list of stratifications (must be a 1-to-1 mapping)", required=false) - protected String[] SELECT_NAMES = {}; + protected ArrayList SELECT_NAMES = new ArrayList(); //protected String[] SELECT_NAMES = {"Intersection", "x1", "x2", "x3", "x4"}; @Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) @@ -194,6 +195,10 @@ public class VariantEvalWalker extends RodWalker { @Argument(shortName="aatUseCodons", fullName="aminoAcidsRepresentedByCodons", doc="for the amino acid table, specifiy that the transitions are represented as codon changes, and not directly amino acid names", required = false) protected boolean aatUseCodons = false; + @Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false) + private String TRANCHE_FILENAME = null; + + // -------------------------------------------------------------------------------------------------------------- // // private walker data @@ -279,6 +284,8 @@ public class VariantEvalWalker extends RodWalker { // // -------------------------------------------------------------------------------------------------------------- + public boolean printInterestingSites() { return writer != null; } + public void initialize() { if ( dels ) { ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.NO_VARIATION); @@ -293,6 +300,17 @@ public class VariantEvalWalker extends RodWalker { determineEvalations(); + if ( TRANCHE_FILENAME != null ) { + // we are going to build a few select names automatically from the tranches file + for ( ApplyVariantCuts.Tranche t : ApplyVariantCuts.readTraches(new File(TRANCHE_FILENAME)) ) { + logger.info("Adding select for all variant above the pCut of : " + t); + SELECT_EXPS.add(String.format("QUAL >= %.2f", t.pCut)); + SELECT_NAMES.add(String.format("FDR-%.2f", t.fdr)); + } + } + + logger.info("Selects: " + SELECT_NAMES); + logger.info("Selects: " + SELECT_EXPS); List selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS); for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java index b8a875777..8e6850ad8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java @@ -72,14 +72,61 @@ public class ApplyVariantCuts extends RodWalker { final ExpandingArrayList qCuts = new ExpandingArrayList(); final ExpandingArrayList filterName = new ExpandingArrayList(); + public static class Tranche { + public double fdr, pCut, novelTiTv; + public int numNovel; + public String name; + + public Tranche(double fdr, double pCut, double novelTiTv, int numNovel, String name) { + this.fdr = fdr; + this.pCut = pCut; + this.novelTiTv = novelTiTv; + this.numNovel = numNovel; + this.name = name; + } + + public Tranche(final String line) { + final String[] vals = line.split(","); + this.fdr = Double.parseDouble(vals[0]); + this.novelTiTv = Double.parseDouble(vals[1]); + this.pCut = Double.parseDouble(vals[2]); + this.numNovel = Integer.parseInt(vals[3]); + this.name = vals[4]; + } + + public String toString() { + return String.format("[Tranche %s cut = %.3f with %d novels @ %.2f]", name, pCut, numNovel, novelTiTv); + } + } + //--------------------------------------------------------------------------------------------------------------- // // initialize // //--------------------------------------------------------------------------------------------------------------- + public static List readTraches(File f) { + boolean firstLine = true; + List tranches = new ArrayList(); + + try { + for( final String line : new XReadLines(f) ) { + if( ! firstLine ) { + tranches.add(new Tranche(line)); + } + firstLine = false; + } + + return tranches; + } catch( FileNotFoundException e ) { + throw new StingException("Can not find input file: " + f); + } + } + public void initialize() { + // todo -- ryan, it's always best to use a read data structure, I need to read these in. + // todo -- I would have updated your code but there's no integration test to protect me from unexpected effects boolean firstLine = true; try { for( final String line : new XReadLines(new File( TRANCHE_FILENAME )) ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 2e73c4fd3..dc6b31cb5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -65,6 +65,9 @@ public class CombineVariants extends RodWalker { @Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Print out interesting sites requiring complex compatibility merging", required=false) public boolean printComplexMerges = false; + @Argument(fullName="filteredAreUncalled", shortName="filteredAreUncalled", doc="If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF", required=false) + public boolean filteredAreUncalled = false; + @Argument(fullName="setKey", shortName="setKey", doc="Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from. Set to null if you don't want the set field emitted.", required=false) public String SET_KEY = "set"; @@ -120,7 +123,7 @@ public class CombineVariants extends RodWalker { // Need to provide reference bases to simpleMerge starting at current locus Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation()); VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, variantMergeOption, - genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY); + genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled); //out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);