Removing some debug code from VQSRv2. VariantEval can now be stratified by contig with -ST Contig. New hidden option in CombineVariants for overlapping records to take the info fields from the record with the highest AC (while still updating AC/AN/AF correctly) instead of dropping info fields which aren't exactly the same.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5448 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2011-03-15 21:28:10 +00:00
parent 4b9b767eb1
commit d98503ca50
19 changed files with 96 additions and 54 deletions

View File

@ -411,13 +411,13 @@ public class VariantContextUtils {
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false);
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
}
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
boolean filteredAreUncalled ) {
boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) {
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
return null;
@ -452,6 +452,8 @@ public class VariantContextUtils {
Set<String> inconsistentAttributes = new HashSet<String>();
String rsID = null;
int depth = 0;
int maxAC = -1;
Map<String, Object> maxACAttributes = new TreeMap<String, Object>();
// counting the number of filtered and variant VCs
int nFiltered = 0, nVariant = 0;
@ -491,6 +493,10 @@ public class VariantContextUtils {
depth += Integer.valueOf(vc.getAttributeAsString(VCFConstants.DEPTH_KEY));
if ( rsID == null && vc.hasID() )
rsID = vc.getID();
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
final int ac = Integer.valueOf(vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY));
if( ac > maxAC ) { maxAC = ac; maxACAttributes = vc.getAttributes(); }
}
for ( Map.Entry<String, Object> p : vc.getAttributes().entrySet() ) {
String key = p.getKey();
@ -543,7 +549,7 @@ public class VariantContextUtils {
if ( rsID != null )
attributes.put(VariantContext.ID_KEY, rsID);
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, attributes);
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? maxACAttributes : attributes) );
// Trim the padded bases of all alleles if necessary
merged = VCFCodec.createVariantContextWithTrimmedAlleles(merged);

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.varianteval;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFHeader;
@ -381,4 +382,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
public Set<String> getCompNames() { return compNames; }
public Set<SortableJexlVCMatchExp> getJexlExpressions() { return jexlExpressions; }
public Set<String> getContigNames() {
final TreeSet<String> contigs = new TreeSet<String>();
for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) {
contigs.add(r.getSequenceName());
}
return contigs;
}
}

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
@ -15,7 +14,7 @@ public class CompRod extends VariantStratifier implements RequiredStratification
private ArrayList<String> states;
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
this.compNames = compNames;
states = new ArrayList<String>();

View File

@ -0,0 +1,36 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
import java.util.ArrayList;
import java.util.Set;
public class Contig extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states;
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
states = new ArrayList<String>();
states.addAll(contigNames);
states.add("all");
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
if (eval != null) {
relevantStates.add("all");
relevantStates.add(eval.getChr());
}
return relevantStates;
}
}

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
@ -13,7 +12,7 @@ public class CpG extends VariantStratifier {
private ArrayList<String> states;
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
states = new ArrayList<String>();
states.add("all");
states.add("CpG");

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
@ -16,7 +15,7 @@ public class Degeneracy extends VariantStratifier {
private HashMap<String, String> degeneracies;
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
states = new ArrayList<String>();
states.add("1-fold");
states.add("2-fold");

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
@ -15,7 +14,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification
private ArrayList<String> states;
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
this.evalNames = evalNames;
states = new ArrayList<String>();

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
@ -14,7 +13,7 @@ public class Filter extends VariantStratifier {
private ArrayList<String> states;
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
states = new ArrayList<String>();
states.add("called");
states.add("filtered");

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
@ -14,7 +13,7 @@ public class FunctionalClass extends VariantStratifier {
private ArrayList<String> states;
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
states = new ArrayList<String>();
states.add("all");
states.add("silent");

View File

@ -15,7 +15,7 @@ public class JexlExpression extends VariantStratifier implements StandardStratif
private ArrayList<String> states;
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
this.jexlExpressions = jexlExpressions;
states = new ArrayList<String>();

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
@ -17,7 +16,7 @@ public class Novelty extends VariantStratifier implements StandardStratification
private ArrayList<String> states;
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
this.knownNames = knownNames;
states = new ArrayList<String>();

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp;
@ -14,7 +13,7 @@ public class Sample extends VariantStratifier {
private ArrayList<String> samples;
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
samples = new ArrayList<String>();
samples.addAll(sampleNames);
}

View File

@ -10,7 +10,7 @@ import java.util.ArrayList;
import java.util.Set;
public abstract class VariantStratifier implements Comparable {
public abstract void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames);
public abstract void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames);
public ArrayList<String> getAllStates() {
return new ArrayList<String>();

View File

@ -103,7 +103,7 @@ public class VariantEvalUtils {
try {
VariantStratifier vs = c.newInstance();
vs.initialize(variantEvalWalker.getJexlExpressions(), variantEvalWalker.getCompNames(), variantEvalWalker.getKnownNames(), variantEvalWalker.getEvalNames(), variantEvalWalker.getSampleNamesForStratification());
vs.initialize(variantEvalWalker.getJexlExpressions(), variantEvalWalker.getCompNames(), variantEvalWalker.getKnownNames(), variantEvalWalker.getEvalNames(), variantEvalWalker.getSampleNamesForStratification(), variantEvalWalker.getContigNames());
strats.add(vs);
} catch (InstantiationException e) {

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
@ -82,6 +83,10 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="assumeIdenticalSamples", shortName="assumeIdenticalSamples", doc="If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime.", required=false)
public boolean ASSUME_IDENTICAL_SAMPLES = false;
@Hidden
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false)
public boolean MERGE_INFO_WITH_MAX_AC = false;
private List<String> priority = null;
public void initialize() {
@ -143,7 +148,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
mergedVC = VariantContextUtils.masterMerge(vcs, "master");
} else {
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption,
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled);
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
}
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.variantrecalibration;
import net.sf.samtools.SAMSequenceRecord;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.*;
@ -90,7 +91,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
//---------------------------------------------------------------------------------------------------------------
public void initialize() {
for ( Tranche t : Tranche.readTranches(TRANCHES_FILE) ) {
for ( final Tranche t : Tranche.readTranches(TRANCHES_FILE) ) {
if ( t.fdr >= FDR_FILTER_LEVEL) {
tranches.add(t);
//statusMsg = "Keeping, above FDR threshold";
@ -99,7 +100,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
}
Collections.reverse(tranches); // this algorithm wants the tranches ordered from worst to best
for( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
for( final ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
if( d.getName().startsWith("input") ) {
inputNames.add(d.getName());
logger.info("Found input variant track with name " + d.getName());
@ -115,6 +116,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
// setup the header fields
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
hInfo.add(new VCFInfoHeaderLine(ContrastiveRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "log10-scaled probability of variant being true under the trained gaussian mixture model"));
final TreeSet<String> samples = new TreeSet<String>();
samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames));
@ -166,7 +168,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
if( !vc.isFiltered() ) {
try {
for( int i = tranches.size() - 1; i >= 0; i-- ) {
Tranche tranche = tranches.get(i);
final Tranche tranche = tranches.get(i);
if( lod >= tranche.minVQSLod ) {
if (i == tranches.size() - 1) {
filterString = VCFConstants.PASSES_FILTERS_v4;
@ -182,7 +184,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
}
if ( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
Set<String> filters = new HashSet<String>();
final Set<String> filters = new HashSet<String>();
filters.add(filterString);
vc = VariantContext.modifyFilters(vc, filters);
}
@ -192,14 +194,14 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
}
final Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
if(lod != null) {
attrs.put(ContrastiveRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod));
}
VariantContext newVC = VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), new HashSet<String>(), attrs);
vcfWriter.add( newVC, ref.getBase() );
vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs), ref.getBase() );
}
}
return 1; // This value isn't used for anything
}

View File

@ -81,6 +81,10 @@ public class ContrastiveRecalibrator extends RodWalker<ExpandingArrayList<Varian
@Hidden
@Argument(fullName = "debugFile", shortName = "debugFile", doc = "Print debugging information here", required=false)
private File DEBUG_FILE = null;
@Hidden
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
protected Boolean TRUST_ALL_POLYMORPHIC = false;
/////////////////////////////
// Private Member Variables
@ -147,31 +151,19 @@ public class ContrastiveRecalibrator extends RodWalker<ExpandingArrayList<Varian
return mapList;
}
VariantContext thisVC = null;
int maxAC = -1;
for( final VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), true, false) ) {
if( vc != null ) {
final int ac = vc.getAttributeAsInt("AC");
if( ac > maxAC ) {
thisVC = vc;
maxAC = ac;
}
}
}
//for( final VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), true, false) ) {
if( thisVC != null ) { // BUGBUG: filtered?
if( vc != null ) { // BUGBUG: filtered?
final VariantDatum datum = new VariantDatum();
datum.annotations = dataManager.decodeAnnotations( ref.getGenomeLocParser(), thisVC, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine
datum.annotations = dataManager.decodeAnnotations( ref.getGenomeLocParser(), vc, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine
datum.pos = context.getLocation();
datum.originalQual = thisVC.getPhredScaledQual();
datum.isTransition = thisVC.isSNP() && ( VariantContextUtils.getSNPSubstitutionType(thisVC).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0 );
dataManager.parseTrainingSets( tracker, ref, context, thisVC, datum );
datum.originalQual = vc.getPhredScaledQual();
datum.isTransition = vc.isSNP() && ( VariantContextUtils.getSNPSubstitutionType(vc).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0 );
dataManager.parseTrainingSets( tracker, ref, context, vc, datum, TRUST_ALL_POLYMORPHIC );
final double priorFactor = QualityUtils.qualToProb( datum.prior );
datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );
mapList.add( datum );
}
//}
}
return mapList;
}

View File

@ -170,7 +170,7 @@ public class VariantDataManager {
return value;
}
public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum ) {
public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC ) {
datum.isKnown = false;
datum.atTruthSite = false;
datum.atTrainingSite = false;
@ -178,7 +178,7 @@ public class VariantDataManager {
for( final TrainingSet trainingSet : trainingSets ) {
final Collection<VariantContext> vcs = tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, true );
final VariantContext trainVC = ( vcs.size() != 0 ? vcs.iterator().next() : null );
if( trainVC != null && trainVC.isVariant() && !trainVC.isFiltered() && ((evalVC.isSNP() && trainVC.isSNP()) || (evalVC.isIndel() && trainVC.isIndel())) && (!trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
if( trainVC != null && trainVC.isVariant() && !trainVC.isFiltered() && ((evalVC.isSNP() && trainVC.isSNP()) || (evalVC.isIndel() && trainVC.isIndel())) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
datum.isKnown = datum.isKnown || trainingSet.isKnown;
datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;

View File

@ -1,12 +1,10 @@
package org.broadinstitute.sting.playground.gatk.walkers.variantrecalibration;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider;
import java.util.*;
import java.io.File;
public class VariantRecalibrationWalkersV2IntegrationTest extends WalkerTest {
static HashMap<String, String> clusterFiles = new HashMap<String, String>();
@ -29,12 +27,12 @@ public class VariantRecalibrationWalkersV2IntegrationTest extends WalkerTest {
VRTest yriTrio = new VRTest("yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf",
"aa211561e6e9b1e323dec4eeaa318088", // tranches
"226adf939e90ad20599108e77ad25dae", // recal file
"345a159fd54f5c38500bb20f2de13737"); // cut VCF
"943cb30cad3bc23e4d945b523e1a67cd"); // cut VCF
VRTest lowPass = new VRTest("lowpass.N3.chr1.raw.vcf",
"f4f2057a8c010206f0f56deff0602452", // tranches
"dd36d252a6dc6e3207addddae731dd88", // recal file
"f1ffd3bb1da3863ccb298a3373d6590a"); // cut VCF
"c142f2a3524dcd7a1a16233670aa5772"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {
@ -53,6 +51,7 @@ public class VariantRecalibrationWalkersV2IntegrationTest extends WalkerTest {
" -B:input,VCF " + params.inVCF +
" -L 1:50,000,000-120,000,000" +
" -an QD -an MQ -an SB -an AF" +
" --trustAllPolymorphic" +
" -recalFile %s" +
" -tranchesFile %s",
Arrays.asList(params.recalMD5, params.tranchesMD5));