From d98503ca50c5c8518ce0b37eb47decff33c9d239 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Tue, 15 Mar 2011 21:28:10 +0000 Subject: [PATCH] Removing some debug code from VQSRv2. VariantEval can now be stratified by contig with -ST Contig. New hidden option in CombineVariants for overlapping records to take the info fields from the record with the highest AC (while still updating AC/AN/AF correctly) instead of dropping info fields which aren't exactly the same. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5448 348d0f76-0448-11de-a6fe-93d51630548a --- .../variantcontext/VariantContextUtils.java | 12 +++++-- .../varianteval/VariantEvalWalker.java | 10 ++++++ .../varianteval/stratifications/CompRod.java | 3 +- .../varianteval/stratifications/Contig.java | 36 +++++++++++++++++++ .../varianteval/stratifications/CpG.java | 3 +- .../stratifications/Degeneracy.java | 3 +- .../varianteval/stratifications/EvalRod.java | 3 +- .../varianteval/stratifications/Filter.java | 3 +- .../stratifications/FunctionalClass.java | 3 +- .../stratifications/JexlExpression.java | 2 +- .../varianteval/stratifications/Novelty.java | 3 +- .../varianteval/stratifications/Sample.java | 3 +- .../stratifications/VariantStratifier.java | 2 +- .../varianteval/util/VariantEvalUtils.java | 2 +- .../walkers/variantutils/CombineVariants.java | 7 +++- .../ApplyRecalibration.java | 16 +++++---- .../ContrastiveRecalibrator.java | 28 ++++++--------- .../VariantDataManager.java | 4 +-- ...RecalibrationWalkersV2IntegrationTest.java | 7 ++-- 19 files changed, 96 insertions(+), 54 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 961f17a0d..de6b4cfdd 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -411,13 +411,13 @@ public class VariantContextUtils { VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase ) { - return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false); + return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false); } public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey, - boolean filteredAreUncalled ) { + boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) { if ( unsortedVCs == null || unsortedVCs.size() == 0 ) return null; @@ -452,6 +452,8 @@ public class VariantContextUtils { Set inconsistentAttributes = new HashSet(); String rsID = null; int depth = 0; + int maxAC = -1; + Map maxACAttributes = new TreeMap(); // counting the number of filtered and variant VCs int nFiltered = 0, nVariant = 0; @@ -491,6 +493,10 @@ public class VariantContextUtils { depth += Integer.valueOf(vc.getAttributeAsString(VCFConstants.DEPTH_KEY)); if ( rsID == null && vc.hasID() ) rsID = vc.getID(); + if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { + final int ac = Integer.valueOf(vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY)); + if( ac > maxAC ) { maxAC = ac; maxACAttributes = vc.getAttributes(); } + } for ( Map.Entry p : vc.getAttributes().entrySet() ) { String key = p.getKey(); @@ -543,7 +549,7 @@ public class VariantContextUtils { if ( rsID != null ) attributes.put(VariantContext.ID_KEY, rsID); - VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, attributes); + VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? maxACAttributes : attributes) ); // Trim the padded bases of all alleles if necessary merged = VCFCodec.createVariantContextWithTrimmedAlleles(merged); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index eb68dd01a..c4b61b2eb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeader; @@ -381,4 +382,13 @@ public class VariantEvalWalker extends RodWalker implements Tr public Set getCompNames() { return compNames; } public Set getJexlExpressions() { return jexlExpressions; } + + public Set getContigNames() { + final TreeSet contigs = new TreeSet(); + for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) { + contigs.add(r.getSequenceName()); + } + return contigs; + } + } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java index 22483f224..e4d95c514 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; @@ -15,7 +14,7 @@ public class CompRod extends VariantStratifier implements RequiredStratification private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { this.compNames = compNames; states = new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java new file mode 100755 index 000000000..0ce74f518 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; + +import java.util.ArrayList; +import java.util.Set; + +public class Contig extends VariantStratifier { + // needs to know the variant context + private ArrayList states; + + @Override + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { + states = new ArrayList(); + states.addAll(contigNames); + states.add("all"); + } + + public ArrayList getAllStates() { + return states; + } + + public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + ArrayList relevantStates = new ArrayList(); + + if (eval != null) { + relevantStates.add("all"); + relevantStates.add(eval.getChr()); + } + + return relevantStates; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java index e9cb77dbb..65608f6b7 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; @@ -13,7 +12,7 @@ public class CpG extends VariantStratifier { private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { states = new ArrayList(); states.add("all"); states.add("CpG"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java index eccc737ff..f47905b61 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; @@ -16,7 +15,7 @@ public class Degeneracy extends VariantStratifier { private HashMap degeneracies; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { states = new ArrayList(); states.add("1-fold"); states.add("2-fold"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index faf0ab245..6400d612c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; @@ -15,7 +14,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { this.evalNames = evalNames; states = new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java index 316300396..10e628520 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; @@ -14,7 +13,7 @@ public class Filter extends VariantStratifier { private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { states = new ArrayList(); states.add("called"); states.add("filtered"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index 609e604a6..ed9d4d266 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; @@ -14,7 +13,7 @@ public class FunctionalClass extends VariantStratifier { private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { states = new ArrayList(); states.add("all"); states.add("silent"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java index ce980e3f5..9fa0eca61 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java @@ -15,7 +15,7 @@ public class JexlExpression extends VariantStratifier implements StandardStratif private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { this.jexlExpressions = jexlExpressions; states = new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index e55bd496e..ab0fa7f4a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; @@ -17,7 +16,7 @@ public class Novelty extends VariantStratifier implements StandardStratification private ArrayList states; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { this.knownNames = knownNames; states = new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java index d95ede0b2..940352246 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; @@ -14,7 +13,7 @@ public class Sample extends VariantStratifier { private ArrayList samples; @Override - public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { samples = new ArrayList(); samples.addAll(sampleNames); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index 8bfdcc3d1..b2043c21a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -10,7 +10,7 @@ import java.util.ArrayList; import java.util.Set; public abstract class VariantStratifier implements Comparable { - public abstract void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames); + public abstract void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames); public ArrayList getAllStates() { return new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 427aa834d..158f7e7d3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -103,7 +103,7 @@ public class VariantEvalUtils { try { VariantStratifier vs = c.newInstance(); - vs.initialize(variantEvalWalker.getJexlExpressions(), variantEvalWalker.getCompNames(), variantEvalWalker.getKnownNames(), variantEvalWalker.getEvalNames(), variantEvalWalker.getSampleNamesForStratification()); + vs.initialize(variantEvalWalker.getJexlExpressions(), variantEvalWalker.getCompNames(), variantEvalWalker.getKnownNames(), variantEvalWalker.getEvalNames(), variantEvalWalker.getSampleNamesForStratification(), variantEvalWalker.getContigNames()); strats.add(vs); } catch (InstantiationException e) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 04dc19066..6ac2e14ea 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; @@ -82,6 +83,10 @@ public class CombineVariants extends RodWalker { @Argument(fullName="assumeIdenticalSamples", shortName="assumeIdenticalSamples", doc="If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime.", required=false) public boolean ASSUME_IDENTICAL_SAMPLES = false; + @Hidden + @Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false) + public boolean MERGE_INFO_WITH_MAX_AC = false; + private List priority = null; public void initialize() { @@ -143,7 +148,7 @@ public class CombineVariants extends RodWalker { mergedVC = VariantContextUtils.masterMerge(vcs, "master"); } else { mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption, - genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled); + genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC); } //out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 0f43d5575..53c480b96 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantrecalibration; +import net.sf.samtools.SAMSequenceRecord; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.commandline.*; @@ -90,7 +91,7 @@ public class ApplyRecalibration extends RodWalker { //--------------------------------------------------------------------------------------------------------------- public void initialize() { - for ( Tranche t : Tranche.readTranches(TRANCHES_FILE) ) { + for ( final Tranche t : Tranche.readTranches(TRANCHES_FILE) ) { if ( t.fdr >= FDR_FILTER_LEVEL) { tranches.add(t); //statusMsg = "Keeping, above FDR threshold"; @@ -99,7 +100,7 @@ public class ApplyRecalibration extends RodWalker { } Collections.reverse(tranches); // this algorithm wants the tranches ordered from worst to best - for( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { + for( final ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { if( d.getName().startsWith("input") ) { inputNames.add(d.getName()); logger.info("Found input variant track with name " + d.getName()); @@ -115,6 +116,7 @@ public class ApplyRecalibration extends RodWalker { // setup the header fields final Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); + hInfo.add(new VCFInfoHeaderLine(ContrastiveRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "log10-scaled probability of variant being true under the trained gaussian mixture model")); final TreeSet samples = new TreeSet(); samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)); @@ -166,7 +168,7 @@ public class ApplyRecalibration extends RodWalker { if( !vc.isFiltered() ) { try { for( int i = tranches.size() - 1; i >= 0; i-- ) { - Tranche tranche = tranches.get(i); + final Tranche tranche = tranches.get(i); if( lod >= tranche.minVQSLod ) { if (i == tranches.size() - 1) { filterString = VCFConstants.PASSES_FILTERS_v4; @@ -182,7 +184,7 @@ public class ApplyRecalibration extends RodWalker { } if ( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) { - Set filters = new HashSet(); + final Set filters = new HashSet(); filters.add(filterString); vc = VariantContext.modifyFilters(vc, filters); } @@ -192,14 +194,14 @@ public class ApplyRecalibration extends RodWalker { } final Map attrs = new HashMap(vc.getAttributes()); - if(lod != null) { attrs.put(ContrastiveRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod)); } - VariantContext newVC = VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), new HashSet(), attrs); - vcfWriter.add( newVC, ref.getBase() ); + + vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs), ref.getBase() ); } } + return 1; // This value isn't used for anything } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/ContrastiveRecalibrator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/ContrastiveRecalibrator.java index c935bcaa2..6b4f59948 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/ContrastiveRecalibrator.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/ContrastiveRecalibrator.java @@ -81,6 +81,10 @@ public class ContrastiveRecalibrator extends RodWalker maxAC ) { - thisVC = vc; - maxAC = ac; - } - } - } - - //for( final VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), true, false) ) { - if( thisVC != null ) { // BUGBUG: filtered? + if( vc != null ) { // BUGBUG: filtered? final VariantDatum datum = new VariantDatum(); - datum.annotations = dataManager.decodeAnnotations( ref.getGenomeLocParser(), thisVC, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine + datum.annotations = dataManager.decodeAnnotations( ref.getGenomeLocParser(), vc, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine datum.pos = context.getLocation(); - datum.originalQual = thisVC.getPhredScaledQual(); - datum.isTransition = thisVC.isSNP() && ( VariantContextUtils.getSNPSubstitutionType(thisVC).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0 ); - dataManager.parseTrainingSets( tracker, ref, context, thisVC, datum ); + datum.originalQual = vc.getPhredScaledQual(); + datum.isTransition = vc.isSNP() && ( VariantContextUtils.getSNPSubstitutionType(vc).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0 ); + dataManager.parseTrainingSets( tracker, ref, context, vc, datum, TRUST_ALL_POLYMORPHIC ); final double priorFactor = QualityUtils.qualToProb( datum.prior ); datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor ); mapList.add( datum ); } - //} + } return mapList; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/VariantDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/VariantDataManager.java index e02efbd03..4cfd4e325 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -170,7 +170,7 @@ public class VariantDataManager { return value; } - public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum ) { + public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC ) { datum.isKnown = false; datum.atTruthSite = false; datum.atTrainingSite = false; @@ -178,7 +178,7 @@ public class VariantDataManager { for( final TrainingSet trainingSet : trainingSets ) { final Collection vcs = tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, true ); final VariantContext trainVC = ( vcs.size() != 0 ? vcs.iterator().next() : null ); - if( trainVC != null && trainVC.isVariant() && !trainVC.isFiltered() && ((evalVC.isSNP() && trainVC.isSNP()) || (evalVC.isIndel() && trainVC.isIndel())) && (!trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) { + if( trainVC != null && trainVC.isVariant() && !trainVC.isFiltered() && ((evalVC.isSNP() && trainVC.isSNP()) || (evalVC.isIndel() && trainVC.isIndel())) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) { datum.isKnown = datum.isKnown || trainingSet.isKnown; datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth; datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining; diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersV2IntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersV2IntegrationTest.java index 0c28f054d..bed411f4c 100755 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersV2IntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersV2IntegrationTest.java @@ -1,12 +1,10 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantrecalibration; import org.broadinstitute.sting.WalkerTest; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.Test; import org.testng.annotations.DataProvider; import java.util.*; -import java.io.File; public class VariantRecalibrationWalkersV2IntegrationTest extends WalkerTest { static HashMap clusterFiles = new HashMap(); @@ -29,12 +27,12 @@ public class VariantRecalibrationWalkersV2IntegrationTest extends WalkerTest { VRTest yriTrio = new VRTest("yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "aa211561e6e9b1e323dec4eeaa318088", // tranches "226adf939e90ad20599108e77ad25dae", // recal file - "345a159fd54f5c38500bb20f2de13737"); // cut VCF + "943cb30cad3bc23e4d945b523e1a67cd"); // cut VCF VRTest lowPass = new VRTest("lowpass.N3.chr1.raw.vcf", "f4f2057a8c010206f0f56deff0602452", // tranches "dd36d252a6dc6e3207addddae731dd88", // recal file - "f1ffd3bb1da3863ccb298a3373d6590a"); // cut VCF + "c142f2a3524dcd7a1a16233670aa5772"); // cut VCF @DataProvider(name = "VRTest") public Object[][] createData1() { @@ -53,6 +51,7 @@ public class VariantRecalibrationWalkersV2IntegrationTest extends WalkerTest { " -B:input,VCF " + params.inVCF + " -L 1:50,000,000-120,000,000" + " -an QD -an MQ -an SB -an AF" + + " --trustAllPolymorphic" + " -recalFile %s" + " -tranchesFile %s", Arrays.asList(params.recalMD5, params.tranchesMD5));