Support for automated selects for tranches in variant eval -- use -tf to make tranch-specific ve outputs. ApplyVariantCuts with tranche reading functions for general use, along with todo for ryan. CombineVariants now has --filteredAreUncalled and will treat filtered snps in input VCFs are uncalled, and so won't emit -filteredInOther set features
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3908 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9231d13252
commit
ac8048f17b
|
|
@ -77,6 +77,13 @@ public class VariantContextUtils {
|
|||
return VariantContextUtils.initializeMatchExps(map);
|
||||
}
|
||||
|
||||
public static List<JexlVCMatchExp> initializeMatchExps(ArrayList<String> names, ArrayList<String> exps) {
|
||||
String[] nameArray = new String[names.size()];
|
||||
String[] expArray = new String[exps.size()];
|
||||
return initializeMatchExps(names.toArray(nameArray), exps.toArray(expArray));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Method for creating JexlVCMatchExp from input walker arguments mapping from names to exps. These two arrays contain
|
||||
* the name associated with each JEXL expression. initializeMatchExps will parse each expression and return
|
||||
|
|
@ -191,12 +198,13 @@ public class VariantContextUtils {
|
|||
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
|
||||
|
||||
return simpleMerge(unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set");
|
||||
return simpleMerge(unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false);
|
||||
}
|
||||
|
||||
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey ) {
|
||||
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
|
||||
boolean filteredAreUncalled ) {
|
||||
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
@ -211,9 +219,13 @@ public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs,
|
|||
List<VariantContext> VCs = new ArrayList<VariantContext>();
|
||||
|
||||
for (VariantContext vc : prepaddedVCs) {
|
||||
VCs.add(createVariantContextWithPaddedAlleles(vc,inputRefBase));
|
||||
// also a reasonable place to remove filtered calls, if needed
|
||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||
VCs.add(createVariantContextWithPaddedAlleles(vc,inputRefBase));
|
||||
}
|
||||
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled
|
||||
return null;
|
||||
|
||||
// establish the baseline info from the first VC
|
||||
VariantContext first = VCs.get(0);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -12,15 +12,14 @@
|
|||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ import java.util.*;
|
|||
|
||||
@Analysis(name = "Genotype Concordance", description = "Determine the genotype concordance between the genotypes in difference tracks")
|
||||
public class GenotypeConcordance extends VariantEvaluator {
|
||||
private static final boolean PRINT_INTERESTING_SITES = false;
|
||||
private static final boolean PRINT_INTERESTING_SITES = true;
|
||||
|
||||
protected final static Logger logger = Logger.getLogger(GenotypeConcordance.class);
|
||||
|
||||
|
|
@ -306,7 +306,8 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
final Genotype.Type truth = validation.getGenotype(sample).getType();
|
||||
sampleStats.incrValue(sample, truth, called);
|
||||
if ( (truth == Genotype.Type.HOM_VAR || truth == Genotype.Type.HET) && called == Genotype.Type.NO_CALL ) {
|
||||
if ( PRINT_INTERESTING_SITES ) System.out.printf("%s: HM3 FN => %s%n", group, validation);
|
||||
if ( PRINT_INTERESTING_SITES && super.getVEWalker().printInterestingSites() )
|
||||
System.out.printf("%s: HM3 FN => %s%n", group, validation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
|||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.ApplyVariantCuts;
|
||||
import org.broadinstitute.sting.playground.utils.report.ReportMarshaller;
|
||||
import org.broadinstitute.sting.playground.utils.report.VE2ReportFactory;
|
||||
import org.broadinstitute.sting.playground.utils.report.templates.ReportFormat;
|
||||
|
|
@ -114,7 +115,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Argument(shortName="select", doc="One or more stratifications to use when evaluating the data", required=false)
|
||||
protected String[] SELECT_EXPS = {};
|
||||
protected ArrayList<String> SELECT_EXPS = new ArrayList<String>();
|
||||
//protected String[] SELECT_EXPS = {"set == \"Intersection\"",
|
||||
// "set == \"HiSeq.WGS.cleaned.ug.vcf\"",
|
||||
// "set == \"HiSeq.WGS.cleaned.ug.vcf\" || set == \"Intersection\"",
|
||||
|
|
@ -122,7 +123,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
// "set == \"HiSeq.WGS.raw.OQ.ug.vcf\" || set == \"Intersection\""};
|
||||
|
||||
@Argument(shortName="selectName", doc="Names to use for the list of stratifications (must be a 1-to-1 mapping)", required=false)
|
||||
protected String[] SELECT_NAMES = {};
|
||||
protected ArrayList<String> SELECT_NAMES = new ArrayList<String>();
|
||||
//protected String[] SELECT_NAMES = {"Intersection", "x1", "x2", "x3", "x4"};
|
||||
|
||||
@Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
|
||||
|
|
@ -194,6 +195,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
@Argument(shortName="aatUseCodons", fullName="aminoAcidsRepresentedByCodons", doc="for the amino acid table, specifiy that the transitions are represented as codon changes, and not directly amino acid names", required = false)
|
||||
protected boolean aatUseCodons = false;
|
||||
|
||||
@Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false)
|
||||
private String TRANCHE_FILENAME = null;
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// private walker data
|
||||
|
|
@ -279,6 +284,8 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public boolean printInterestingSites() { return writer != null; }
|
||||
|
||||
public void initialize() {
|
||||
if ( dels ) {
|
||||
ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.NO_VARIATION);
|
||||
|
|
@ -293,6 +300,17 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
determineEvalations();
|
||||
|
||||
if ( TRANCHE_FILENAME != null ) {
|
||||
// we are going to build a few select names automatically from the tranches file
|
||||
for ( ApplyVariantCuts.Tranche t : ApplyVariantCuts.readTraches(new File(TRANCHE_FILENAME)) ) {
|
||||
logger.info("Adding select for all variant above the pCut of : " + t);
|
||||
SELECT_EXPS.add(String.format("QUAL >= %.2f", t.pCut));
|
||||
SELECT_NAMES.add(String.format("FDR-%.2f", t.fdr));
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("Selects: " + SELECT_NAMES);
|
||||
logger.info("Selects: " + SELECT_EXPS);
|
||||
List<VariantContextUtils.JexlVCMatchExp> selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS);
|
||||
|
||||
for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
|
||||
|
|
|
|||
|
|
@ -72,14 +72,61 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
|
|||
final ExpandingArrayList<Double> qCuts = new ExpandingArrayList<Double>();
|
||||
final ExpandingArrayList<String> filterName = new ExpandingArrayList<String>();
|
||||
|
||||
public static class Tranche {
|
||||
public double fdr, pCut, novelTiTv;
|
||||
public int numNovel;
|
||||
public String name;
|
||||
|
||||
public Tranche(double fdr, double pCut, double novelTiTv, int numNovel, String name) {
|
||||
this.fdr = fdr;
|
||||
this.pCut = pCut;
|
||||
this.novelTiTv = novelTiTv;
|
||||
this.numNovel = numNovel;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public Tranche(final String line) {
|
||||
final String[] vals = line.split(",");
|
||||
this.fdr = Double.parseDouble(vals[0]);
|
||||
this.novelTiTv = Double.parseDouble(vals[1]);
|
||||
this.pCut = Double.parseDouble(vals[2]);
|
||||
this.numNovel = Integer.parseInt(vals[3]);
|
||||
this.name = vals[4];
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("[Tranche %s cut = %.3f with %d novels @ %.2f]", name, pCut, numNovel, novelTiTv);
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// initialize
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public static List<Tranche> readTraches(File f) {
|
||||
boolean firstLine = true;
|
||||
List<Tranche> tranches = new ArrayList<Tranche>();
|
||||
|
||||
try {
|
||||
for( final String line : new XReadLines(f) ) {
|
||||
if( ! firstLine ) {
|
||||
tranches.add(new Tranche(line));
|
||||
}
|
||||
firstLine = false;
|
||||
}
|
||||
|
||||
return tranches;
|
||||
} catch( FileNotFoundException e ) {
|
||||
throw new StingException("Can not find input file: " + f);
|
||||
}
|
||||
}
|
||||
|
||||
public void initialize() {
|
||||
|
||||
// todo -- ryan, it's always best to use a read data structure, I need to read these in.
|
||||
// todo -- I would have updated your code but there's no integration test to protect me from unexpected effects
|
||||
boolean firstLine = true;
|
||||
try {
|
||||
for( final String line : new XReadLines(new File( TRANCHE_FILENAME )) ) {
|
||||
|
|
|
|||
|
|
@ -65,6 +65,9 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Print out interesting sites requiring complex compatibility merging", required=false)
|
||||
public boolean printComplexMerges = false;
|
||||
|
||||
@Argument(fullName="filteredAreUncalled", shortName="filteredAreUncalled", doc="If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF", required=false)
|
||||
public boolean filteredAreUncalled = false;
|
||||
|
||||
@Argument(fullName="setKey", shortName="setKey", doc="Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from. Set to null if you don't want the set field emitted.", required=false)
|
||||
public String SET_KEY = "set";
|
||||
|
||||
|
|
@ -120,7 +123,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
// Need to provide reference bases to simpleMerge starting at current locus
|
||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
||||
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, variantMergeOption,
|
||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY);
|
||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);
|
||||
|
||||
|
||||
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
|
||||
|
|
|
|||
Loading…
Reference in New Issue