Removing some debug code from VQSRv2. VariantEval can now be stratified by contig with -ST Contig. New hidden option in CombineVariants for overlapping records to take the info fields from the record with the highest AC (while still updating AC/AN/AF correctly) instead of dropping info fields which aren't exactly the same.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5448 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4b9b767eb1
commit
d98503ca50
|
|
@ -411,13 +411,13 @@ public class VariantContextUtils {
|
|||
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
|
||||
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false);
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
|
||||
}
|
||||
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
|
||||
boolean filteredAreUncalled ) {
|
||||
boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) {
|
||||
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
@ -452,6 +452,8 @@ public class VariantContextUtils {
|
|||
Set<String> inconsistentAttributes = new HashSet<String>();
|
||||
String rsID = null;
|
||||
int depth = 0;
|
||||
int maxAC = -1;
|
||||
Map<String, Object> maxACAttributes = new TreeMap<String, Object>();
|
||||
|
||||
// counting the number of filtered and variant VCs
|
||||
int nFiltered = 0, nVariant = 0;
|
||||
|
|
@ -491,6 +493,10 @@ public class VariantContextUtils {
|
|||
depth += Integer.valueOf(vc.getAttributeAsString(VCFConstants.DEPTH_KEY));
|
||||
if ( rsID == null && vc.hasID() )
|
||||
rsID = vc.getID();
|
||||
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||
final int ac = Integer.valueOf(vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY));
|
||||
if( ac > maxAC ) { maxAC = ac; maxACAttributes = vc.getAttributes(); }
|
||||
}
|
||||
|
||||
for ( Map.Entry<String, Object> p : vc.getAttributes().entrySet() ) {
|
||||
String key = p.getKey();
|
||||
|
|
@ -543,7 +549,7 @@ public class VariantContextUtils {
|
|||
if ( rsID != null )
|
||||
attributes.put(VariantContext.ID_KEY, rsID);
|
||||
|
||||
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, attributes);
|
||||
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? maxACAttributes : attributes) );
|
||||
// Trim the padded bases of all alleles if necessary
|
||||
merged = VCFCodec.createVariantContextWithTrimmedAlleles(merged);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
|
|
@ -381,4 +382,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
public Set<String> getCompNames() { return compNames; }
|
||||
|
||||
public Set<SortableJexlVCMatchExp> getJexlExpressions() { return jexlExpressions; }
|
||||
|
||||
public Set<String> getContigNames() {
|
||||
final TreeSet<String> contigs = new TreeSet<String>();
|
||||
for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) {
|
||||
contigs.add(r.getSequenceName());
|
||||
}
|
||||
return contigs;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
|
|
@ -15,7 +14,7 @@ public class CompRod extends VariantStratifier implements RequiredStratification
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
this.compNames = compNames;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,36 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
public class Contig extends VariantStratifier {
|
||||
// needs to know the variant context
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.addAll(contigNames);
|
||||
states.add("all");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
if (eval != null) {
|
||||
relevantStates.add("all");
|
||||
relevantStates.add(eval.getChr());
|
||||
}
|
||||
|
||||
return relevantStates;
|
||||
}
|
||||
}
|
||||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
|
|
@ -13,7 +12,7 @@ public class CpG extends VariantStratifier {
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.add("all");
|
||||
states.add("CpG");
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
|
|
@ -16,7 +15,7 @@ public class Degeneracy extends VariantStratifier {
|
|||
private HashMap<String, String> degeneracies;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.add("1-fold");
|
||||
states.add("2-fold");
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
|
|
@ -15,7 +14,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
this.evalNames = evalNames;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
|
|
@ -14,7 +13,7 @@ public class Filter extends VariantStratifier {
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.add("called");
|
||||
states.add("filtered");
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
|
|
@ -14,7 +13,7 @@ public class FunctionalClass extends VariantStratifier {
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
states = new ArrayList<String>();
|
||||
states.add("all");
|
||||
states.add("silent");
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ public class JexlExpression extends VariantStratifier implements StandardStratif
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
this.jexlExpressions = jexlExpressions;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
|
|
@ -17,7 +16,7 @@ public class Novelty extends VariantStratifier implements StandardStratification
|
|||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
this.knownNames = knownNames;
|
||||
|
||||
states = new ArrayList<String>();
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
|
||||
|
||||
|
|
@ -14,7 +13,7 @@ public class Sample extends VariantStratifier {
|
|||
private ArrayList<String> samples;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
samples = new ArrayList<String>();
|
||||
samples.addAll(sampleNames);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import java.util.ArrayList;
|
|||
import java.util.Set;
|
||||
|
||||
public abstract class VariantStratifier implements Comparable {
|
||||
public abstract void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames);
|
||||
public abstract void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames);
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return new ArrayList<String>();
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ public class VariantEvalUtils {
|
|||
|
||||
try {
|
||||
VariantStratifier vs = c.newInstance();
|
||||
vs.initialize(variantEvalWalker.getJexlExpressions(), variantEvalWalker.getCompNames(), variantEvalWalker.getKnownNames(), variantEvalWalker.getEvalNames(), variantEvalWalker.getSampleNamesForStratification());
|
||||
vs.initialize(variantEvalWalker.getJexlExpressions(), variantEvalWalker.getCompNames(), variantEvalWalker.getKnownNames(), variantEvalWalker.getEvalNames(), variantEvalWalker.getSampleNamesForStratification(), variantEvalWalker.getContigNames());
|
||||
|
||||
strats.add(vs);
|
||||
} catch (InstantiationException e) {
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
|
|
@ -82,6 +83,10 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="assumeIdenticalSamples", shortName="assumeIdenticalSamples", doc="If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime.", required=false)
|
||||
public boolean ASSUME_IDENTICAL_SAMPLES = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false)
|
||||
public boolean MERGE_INFO_WITH_MAX_AC = false;
|
||||
|
||||
private List<String> priority = null;
|
||||
|
||||
public void initialize() {
|
||||
|
|
@ -143,7 +148,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
mergedVC = VariantContextUtils.masterMerge(vcs, "master");
|
||||
} else {
|
||||
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption,
|
||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);
|
||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
|
||||
}
|
||||
|
||||
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.variantrecalibration;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
|
|
@ -90,7 +91,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public void initialize() {
|
||||
for ( Tranche t : Tranche.readTranches(TRANCHES_FILE) ) {
|
||||
for ( final Tranche t : Tranche.readTranches(TRANCHES_FILE) ) {
|
||||
if ( t.fdr >= FDR_FILTER_LEVEL) {
|
||||
tranches.add(t);
|
||||
//statusMsg = "Keeping, above FDR threshold";
|
||||
|
|
@ -99,7 +100,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
Collections.reverse(tranches); // this algorithm wants the tranches ordered from worst to best
|
||||
|
||||
for( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
|
||||
for( final ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
|
||||
if( d.getName().startsWith("input") ) {
|
||||
inputNames.add(d.getName());
|
||||
logger.info("Found input variant track with name " + d.getName());
|
||||
|
|
@ -115,6 +116,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
// setup the header fields
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
|
||||
hInfo.add(new VCFInfoHeaderLine(ContrastiveRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "log10-scaled probability of variant being true under the trained gaussian mixture model"));
|
||||
final TreeSet<String> samples = new TreeSet<String>();
|
||||
samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames));
|
||||
|
||||
|
|
@ -166,7 +168,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
if( !vc.isFiltered() ) {
|
||||
try {
|
||||
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
||||
Tranche tranche = tranches.get(i);
|
||||
final Tranche tranche = tranches.get(i);
|
||||
if( lod >= tranche.minVQSLod ) {
|
||||
if (i == tranches.size() - 1) {
|
||||
filterString = VCFConstants.PASSES_FILTERS_v4;
|
||||
|
|
@ -182,7 +184,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
if ( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
|
||||
Set<String> filters = new HashSet<String>();
|
||||
final Set<String> filters = new HashSet<String>();
|
||||
filters.add(filterString);
|
||||
vc = VariantContext.modifyFilters(vc, filters);
|
||||
}
|
||||
|
|
@ -192,14 +194,14 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
final Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
||||
|
||||
if(lod != null) {
|
||||
attrs.put(ContrastiveRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod));
|
||||
}
|
||||
VariantContext newVC = VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), new HashSet<String>(), attrs);
|
||||
vcfWriter.add( newVC, ref.getBase() );
|
||||
|
||||
vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs), ref.getBase() );
|
||||
}
|
||||
}
|
||||
|
||||
return 1; // This value isn't used for anything
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -81,6 +81,10 @@ public class ContrastiveRecalibrator extends RodWalker<ExpandingArrayList<Varian
|
|||
@Hidden
|
||||
@Argument(fullName = "debugFile", shortName = "debugFile", doc = "Print debugging information here", required=false)
|
||||
private File DEBUG_FILE = null;
|
||||
@Hidden
|
||||
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
|
||||
protected Boolean TRUST_ALL_POLYMORPHIC = false;
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
|
|
@ -147,31 +151,19 @@ public class ContrastiveRecalibrator extends RodWalker<ExpandingArrayList<Varian
|
|||
return mapList;
|
||||
}
|
||||
|
||||
VariantContext thisVC = null;
|
||||
int maxAC = -1;
|
||||
for( final VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), true, false) ) {
|
||||
if( vc != null ) {
|
||||
final int ac = vc.getAttributeAsInt("AC");
|
||||
if( ac > maxAC ) {
|
||||
thisVC = vc;
|
||||
maxAC = ac;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//for( final VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), true, false) ) {
|
||||
if( thisVC != null ) { // BUGBUG: filtered?
|
||||
if( vc != null ) { // BUGBUG: filtered?
|
||||
final VariantDatum datum = new VariantDatum();
|
||||
datum.annotations = dataManager.decodeAnnotations( ref.getGenomeLocParser(), thisVC, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine
|
||||
datum.annotations = dataManager.decodeAnnotations( ref.getGenomeLocParser(), vc, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine
|
||||
datum.pos = context.getLocation();
|
||||
datum.originalQual = thisVC.getPhredScaledQual();
|
||||
datum.isTransition = thisVC.isSNP() && ( VariantContextUtils.getSNPSubstitutionType(thisVC).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0 );
|
||||
dataManager.parseTrainingSets( tracker, ref, context, thisVC, datum );
|
||||
datum.originalQual = vc.getPhredScaledQual();
|
||||
datum.isTransition = vc.isSNP() && ( VariantContextUtils.getSNPSubstitutionType(vc).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0 );
|
||||
dataManager.parseTrainingSets( tracker, ref, context, vc, datum, TRUST_ALL_POLYMORPHIC );
|
||||
final double priorFactor = QualityUtils.qualToProb( datum.prior );
|
||||
datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );
|
||||
mapList.add( datum );
|
||||
}
|
||||
//}
|
||||
}
|
||||
|
||||
return mapList;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ public class VariantDataManager {
|
|||
return value;
|
||||
}
|
||||
|
||||
public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum ) {
|
||||
public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC ) {
|
||||
datum.isKnown = false;
|
||||
datum.atTruthSite = false;
|
||||
datum.atTrainingSite = false;
|
||||
|
|
@ -178,7 +178,7 @@ public class VariantDataManager {
|
|||
for( final TrainingSet trainingSet : trainingSets ) {
|
||||
final Collection<VariantContext> vcs = tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, true );
|
||||
final VariantContext trainVC = ( vcs.size() != 0 ? vcs.iterator().next() : null );
|
||||
if( trainVC != null && trainVC.isVariant() && !trainVC.isFiltered() && ((evalVC.isSNP() && trainVC.isSNP()) || (evalVC.isIndel() && trainVC.isIndel())) && (!trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
|
||||
if( trainVC != null && trainVC.isVariant() && !trainVC.isFiltered() && ((evalVC.isSNP() && trainVC.isSNP()) || (evalVC.isIndel() && trainVC.isIndel())) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
|
||||
datum.isKnown = datum.isKnown || trainingSet.isKnown;
|
||||
datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
|
||||
datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;
|
||||
|
|
|
|||
|
|
@ -1,12 +1,10 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
public class VariantRecalibrationWalkersV2IntegrationTest extends WalkerTest {
|
||||
static HashMap<String, String> clusterFiles = new HashMap<String, String>();
|
||||
|
|
@ -29,12 +27,12 @@ public class VariantRecalibrationWalkersV2IntegrationTest extends WalkerTest {
|
|||
VRTest yriTrio = new VRTest("yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf",
|
||||
"aa211561e6e9b1e323dec4eeaa318088", // tranches
|
||||
"226adf939e90ad20599108e77ad25dae", // recal file
|
||||
"345a159fd54f5c38500bb20f2de13737"); // cut VCF
|
||||
"943cb30cad3bc23e4d945b523e1a67cd"); // cut VCF
|
||||
|
||||
VRTest lowPass = new VRTest("lowpass.N3.chr1.raw.vcf",
|
||||
"f4f2057a8c010206f0f56deff0602452", // tranches
|
||||
"dd36d252a6dc6e3207addddae731dd88", // recal file
|
||||
"f1ffd3bb1da3863ccb298a3373d6590a"); // cut VCF
|
||||
"c142f2a3524dcd7a1a16233670aa5772"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
public Object[][] createData1() {
|
||||
|
|
@ -53,6 +51,7 @@ public class VariantRecalibrationWalkersV2IntegrationTest extends WalkerTest {
|
|||
" -B:input,VCF " + params.inVCF +
|
||||
" -L 1:50,000,000-120,000,000" +
|
||||
" -an QD -an MQ -an SB -an AF" +
|
||||
" --trustAllPolymorphic" +
|
||||
" -recalFile %s" +
|
||||
" -tranchesFile %s",
|
||||
Arrays.asList(params.recalMD5, params.tranchesMD5));
|
||||
|
|
|
|||
Loading…
Reference in New Issue