Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Mauricio Carneiro 2011-08-10 16:57:34 -04:00
commit 46051c36c6
79 changed files with 603 additions and 452 deletions

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.simpleframework.xml.*;
/**
* @author ebanks
* @version 1.0
*/
@Root
public class DbsnpArgumentCollection {
/**
* A dbSNP VCF file.
*/
@Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false)
public RodBinding<VariantContext> dbsnp = RodBinding.makeUnbound(VariantContext.class);
}

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.simpleframework.xml.Root;
/**
* @author ebanks
* @version 1.0
*/
@Root
public class StandardVariantContextInputArgumentCollection {
/**
* The VCF file we are using.
*
* Variants from this file are used by this tool as input.
*/
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
}

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -44,7 +46,7 @@ import java.util.Map;
public class AlleleBalance extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -17,7 +19,7 @@ import java.util.*;
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
Double ratio = annotateSNP(stratifiedContext, vc, g);
if (ratio == null)
return null;

View File

@ -31,6 +31,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -48,7 +50,7 @@ import java.util.Map;
public class BaseCounts extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -50,7 +52,7 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( ! vc.hasGenotypes() )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -18,7 +20,7 @@ import java.util.Map;
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -28,7 +30,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import cern.jet.math.Arithmetic;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -46,7 +48,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( ! vc.isVariant() || vc.isFiltered() )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -18,7 +20,7 @@ import java.util.Map;
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
double content = computeGCContent(ref);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.2f", content));

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -27,7 +29,7 @@ public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
private static final int MIN_SAMPLES = 10;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
final Map<String, Genotype> genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import net.sf.samtools.SAMRecord;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -54,7 +56,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
private final static char REGEXP_WILDCARD = '.';
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
return null;

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -24,7 +26,7 @@ public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgress
private static final int MIN_GENOTYPE_QUALITY = 10;
private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
final Map<String, Genotype> genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -20,7 +22,7 @@ public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnot
private boolean ANNOTATE_INDELS = true;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( !vc.isBiallelic() )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -21,7 +23,7 @@ import java.util.*;
*/
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
int run;
if (vc.isMixed()) {

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -18,7 +20,7 @@ import java.util.Map;
public class LowMQ extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -20,7 +22,7 @@ import java.util.Map;
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -50,8 +52,8 @@ import java.util.Map;
* To change this template use File | Settings | File Templates.
*/
public class MappingQualityZeroBySample extends GenotypeAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref,
AlignmentContext context, VariantContext vc, Genotype g) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings,
ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -20,7 +22,7 @@ import java.util.Map;
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -22,7 +24,7 @@ import java.util.Map;
*/
public class NBaseCount extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if( stratifiedContexts.size() == 0 )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -18,7 +20,7 @@ import java.util.Map;
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -22,7 +24,7 @@ import java.util.Map;
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -25,7 +27,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
static final boolean DEBUG = false;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -58,7 +60,7 @@ public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref,
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref,
AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -18,7 +20,7 @@ import java.util.Map;
public class SBByDepth extends AnnotationByDepth {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -43,7 +45,7 @@ import java.util.Map;
public class SampleList extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( vc.isMonomorphic() || !vc.hasGenotypes() )
return null;

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -69,8 +70,11 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
public static final String CODON_NUM_KEY = "CODON_NUM";
public static final String CDS_SIZE_KEY = "CDS_SIZE";
public Map<String, Object> annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
List<Feature> features = tracker.getValues(Feature.class);
public static final String SNPEFF_ROD_NAME = "snpEffFile";
public Map<String, Object> annotate ( RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
RodBinding<SnpEffFeature> snpEffRodBinding = (RodBinding<SnpEffFeature>)rodBindings.get(SNPEFF_ROD_NAME);
List<SnpEffFeature> features = tracker.getValues(snpEffRodBinding);
// Add only annotations for one of the most biologically-significant effects as defined in
// the SnpEffConstants class:
@ -83,18 +87,14 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
return generateAnnotations(mostSignificantEffect);
}
private SnpEffFeature getMostSignificantEffect ( List<Feature> features ) {
private SnpEffFeature getMostSignificantEffect ( List<SnpEffFeature> snpEffFeatures ) {
SnpEffFeature mostSignificantEffect = null;
for ( Feature feature : features ) {
if ( feature instanceof SnpEffFeature ) {
SnpEffFeature snpEffFeature = (SnpEffFeature)feature;
for ( SnpEffFeature snpEffFeature : snpEffFeatures ) {
if ( mostSignificantEffect == null ||
snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) {
if ( mostSignificantEffect == null ||
snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) {
mostSignificantEffect = snpEffFeature;
}
mostSignificantEffect = snpEffFeature;
}
}

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -18,7 +20,7 @@ import java.util.Map;
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -29,7 +31,7 @@ public class TechnologyComposition extends InfoFieldAnnotation implements Experi
private String n454 ="Num454";
private String nSolid = "NumSOLiD";
private String nOther = "NumOther";
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -25,7 +25,10 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -54,11 +57,35 @@ import java.util.*;
@By(DataSource.REFERENCE)
public class VariantAnnotator extends RodWalker<Integer, Integer> {
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/**
* A SnpEff output file from which to add annotations.
*
* The INFO field will be annotated with information on the most biologically-significant effect
* listed in the SnpEff output file for each variant.
*/
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false)
public RodBinding<SnpEffFeature> snpEffFile;
public RodBinding<SnpEffFeature> snpEffFile = RodBinding.makeUnbound(SnpEffFeature.class);
/**
* A dbSNP VCF file from which to annotate.
*
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
*/
@ArgumentCollection
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
/**
* A comparisons VCF file from which to annotate.
*
* If a record in the 'variant' track overlaps with a record from the provided comp track, the INFO field will be annotated
* as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field). Records that are filtered in the comp track will be ignored.
* Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
*/
@Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
public RodBinding<VariantContext> comps = RodBinding.makeUnbound(VariantContext.class);
@Output(doc="File to which variants should be written",required=true)
protected VCFWriter vcfWriter = null;
@ -95,6 +122,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
private Collection<VariantContext> indelBufferContext;
private Map<String, RodBinding<? extends Feature>> rodBindings = new HashMap<String, RodBinding<? extends Feature>>();
private void listAnnotationsAndExit() {
List<Class<? extends InfoFieldAnnotation>> infoAnnotationClasses = new PluginManager<InfoFieldAnnotation>(InfoFieldAnnotation.class).getPlugins();
@ -123,7 +152,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
listAnnotationsAndExit();
// get the list of all sample names from the variant VCF input rod, if applicable
List<String> rodName = Arrays.asList(variants.getName());
List<String> rodName = Arrays.asList(variantCollection.variants.getName());
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
// add the non-VCF sample from the command-line, if applicable
@ -137,17 +166,19 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired.");
}
initializeRodBindingMap();
if ( USE_ALL_ANNOTATIONS )
engine = new VariantAnnotatorEngine(getToolkit());
engine = new VariantAnnotatorEngine(getToolkit(), rodBindings);
else
engine = new VariantAnnotatorEngine(getToolkit(), annotationGroupsToUse, annotationsToUse);
engine = new VariantAnnotatorEngine(getToolkit(), annotationGroupsToUse, annotationsToUse, rodBindings);
engine.initializeExpressions(expressionsToUse);
// setup the header fields
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(engine.getVCFAnnotationDescriptions());
for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())) ) {
for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variantCollection.variants.getName())) ) {
if ( isUniqueHeaderLine(line, hInfo) )
hInfo.add(line);
}
@ -160,6 +191,13 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
}
}
private void initializeRodBindingMap() {
rodBindings.put(variantCollection.variants.getName(), variantCollection.variants);
rodBindings.put(snpEffFile.getName(), snpEffFile);
rodBindings.put(dbsnp.dbsnp.getName(), dbsnp.dbsnp);
rodBindings.put(comps.getName(), comps);
}
public static boolean isUniqueHeaderLine(VCFHeaderLine line, Set<VCFHeaderLine> currentSet) {
if ( !(line instanceof VCFCompoundHeaderLine) )
return true;
@ -206,7 +244,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
Collection<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
if ( VCs.size() == 0 )
return 0;

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -55,7 +56,7 @@ public class VariantAnnotatorEngine {
private List<VAExpression> requestedExpressions = new ArrayList<VAExpression>();
private HashMap<String, String> dbAnnotations = new HashMap<String, String>();
private Map<String, RodBinding<? extends Feature>> rodBindings;
private static class VAExpression {
public String fullName, bindingName, fieldName;
@ -72,16 +73,18 @@ public class VariantAnnotatorEngine {
}
// use this constructor if you want all possible annotations
public VariantAnnotatorEngine(GenomeAnalysisEngine engine) {
public VariantAnnotatorEngine(GenomeAnalysisEngine engine, Map<String, RodBinding<? extends Feature>> rodBindings) {
requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations();
requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations();
initializeDBs(engine);
this.rodBindings = rodBindings;
}
// use this constructor if you want to select specific annotations (and/or interfaces)
public VariantAnnotatorEngine(GenomeAnalysisEngine engine, List<String> annotationGroupsToUse, List<String> annotationsToUse) {
public VariantAnnotatorEngine(GenomeAnalysisEngine engine, List<String> annotationGroupsToUse, List<String> annotationsToUse, Map<String, RodBinding<? extends Feature>> rodBindings) {
initializeAnnotations(annotationGroupsToUse, annotationsToUse);
initializeDBs(engine);
this.rodBindings = rodBindings;
}
// select specific expressions to use
@ -137,7 +140,7 @@ public class VariantAnnotatorEngine {
// go through all the requested info annotationTypes
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
Map<String, Object> annotationsFromCurrentType = annotationType.annotate(tracker, ref, stratifiedContexts, vc);
Map<String, Object> annotationsFromCurrentType = annotationType.annotate(tracker, rodBindings, ref, stratifiedContexts, vc);
if ( annotationsFromCurrentType != null )
infoAnnotations.putAll(annotationsFromCurrentType);
}
@ -202,7 +205,7 @@ public class VariantAnnotatorEngine {
Map<String, Object> genotypeAnnotations = new HashMap<String, Object>(genotype.getAttributes());
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
Map<String, Object> result = annotation.annotate(tracker, ref, context, vc, genotype);
Map<String, Object> result = annotation.annotate(tracker, rodBindings, ref, context, vc, genotype);
if ( result != null )
genotypeAnnotations.putAll(result);
}

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -13,7 +15,8 @@ import java.util.Map;
public abstract class GenotypeAnnotation extends VariantAnnotatorAnnotation {
// return annotations for the given contexts/genotype split by sample
public abstract Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g);
public abstract Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings,
ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g);
// return the descriptions used for the VCF FORMAT meta field
public abstract List<VCFFormatHeaderLine> getDescriptions();

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.List;
@ -13,7 +13,8 @@ import java.util.Map;
public abstract class InfoFieldAnnotation extends VariantAnnotatorAnnotation {
// return annotations for the given contexts split by sample
public abstract Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc);
public abstract Map<String, Object> annotate(RefMetaDataTracker tracker, Map<String, RodBinding<? extends Feature>> rodBindings,
ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc);
// return the descriptions used for the VCF INFO meta field
public abstract List<VCFInfoHeaderLine> getDescriptions();

View File

@ -25,16 +25,13 @@
package org.broadinstitute.sting.gatk.walkers.beagle;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
@ -52,10 +49,10 @@ import static java.lang.Math.log10;
/**
* Takes files produced by Beagle imputation engine and creates a vcf with modified annotations.
*/
@Requires(value={})
public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false)
public RodBinding<VariantContext> comp = RodBinding.makeUnbound(VariantContext.class);
@ -111,7 +108,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site"));
}
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variantCollection.variants.getName()));
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);
@ -123,7 +120,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
return 0;
GenomeLoc loc = context.getLocation();
VariantContext vc_input = tracker.getFirstValue(variants, loc);
VariantContext vc_input = tracker.getFirstValue(variantCollection.variants, loc);
VariantContext vc_comp = tracker.getFirstValue(comp, loc);

View File

@ -27,10 +27,10 @@ package org.broadinstitute.sting.gatk.walkers.beagle;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -50,10 +50,9 @@ import java.util.*;
/**
* Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input variant file
*/
@Requires(value={})
public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Input(fullName="validation", shortName = "validation", doc="Input VCF file", required=false)
public RodBinding<VariantContext> validation = RodBinding.makeUnbound(VariantContext.class);
@ -98,7 +97,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
public void initialize() {
samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));
samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variantCollection.variants.getName()));
beagleWriter.print("marker alleleA alleleB");
for ( String sample : samples )
@ -120,7 +119,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
if( tracker != null ) {
GenomeLoc loc = context.getLocation();
VariantContext variant_eval = tracker.getFirstValue(variants, loc);
VariantContext variant_eval = tracker.getFirstValue(variantCollection.variants, loc);
VariantContext validation_eval = tracker.getFirstValue(validation, loc);
if ( goodSite(variant_eval,validation_eval) ) {

View File

@ -32,7 +32,6 @@ import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
@ -55,7 +54,6 @@ import java.util.Set;
* in input variant file. Will additional hold back a fraction of the sites for evaluation, marking the
* genotypes at that sites as missing, and writing the truth of these sites to a second VCF file
*/
@Requires(value={})
public class VariantsToBeagleUnphasedWalker extends RodWalker<Integer, Integer> {
@Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;

View File

@ -22,10 +22,11 @@
package org.broadinstitute.sting.gatk.walkers.coverage;
import org.broad.tribble.Feature;
import org.broad.tribble.bed.FullBEDFeature;
import org.broad.tribble.bed.BEDFeature;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -44,11 +45,11 @@ public class CompareCallableLociWalker extends RodWalker<List<CallableLociWalker
@Output
protected PrintStream out;
@Argument(shortName="comp1", doc="First comparison track name", required=false)
protected String COMP1 = "comp1";
@Input(fullName="comp1", shortName = "comp1", doc="First comparison track name", required=true)
public RodBinding<BEDFeature> compTrack1;
@Argument(shortName="comp2", doc="First comparison track name", required=false)
protected String COMP2 = "comp2";
@Input(fullName="comp2", shortName = "comp2", doc="Second comparison track name", required=true)
public RodBinding<BEDFeature> compTrack2;
@Argument(shortName="printState", doc="If provided, prints sites satisfying this state pair", required=false)
protected String printState = null;
@ -78,8 +79,8 @@ public class CompareCallableLociWalker extends RodWalker<List<CallableLociWalker
// --------------------------------------------------------------------------------------------------------------
public List<CallableLociWalker.CallableBaseState> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker != null ) {
CallableLociWalker.CallableBaseState comp1 = getCallableBaseState(tracker, COMP1);
CallableLociWalker.CallableBaseState comp2 = getCallableBaseState(tracker, COMP2);
CallableLociWalker.CallableBaseState comp1 = getCallableBaseState(tracker, compTrack1);
CallableLociWalker.CallableBaseState comp2 = getCallableBaseState(tracker, compTrack2);
if ( printState != null && comp1.getState() == printState1 && comp2.getState() == printState2 ) {
out.printf("%s %s %s %s%n", comp1.getLocation(), comp1.getState(), comp2.getLocation(), comp2.getState());
@ -91,14 +92,14 @@ public class CompareCallableLociWalker extends RodWalker<List<CallableLociWalker
}
}
private CallableLociWalker.CallableBaseState getCallableBaseState(RefMetaDataTracker tracker, String track) {
private CallableLociWalker.CallableBaseState getCallableBaseState(RefMetaDataTracker tracker, RodBinding<BEDFeature> rodBinding) {
//System.out.printf("tracker %s%n", tracker);
List<Feature> bindings = tracker.getValues(Feature.class, track);
if ( bindings.size() != 1 || ! (bindings.get(0) instanceof FullBEDFeature)) {
throw new UserException.MalformedFile(String.format("%s track isn't a properly formated CallableBases object!", track));
List<BEDFeature> bindings = tracker.getValues(rodBinding);
if ( bindings.size() != 1 ) {
throw new UserException.MalformedFile(String.format("%s track isn't a properly formated CallableBases object!", rodBinding.getName()));
}
FullBEDFeature bed = (FullBEDFeature)bindings.get(0);
BEDFeature bed = bindings.get(0);
GenomeLoc loc = getToolkit().getGenomeLocParser().createGenomeLoc(bed.getChr(), bed.getStart(), bed.getEnd());
CallableLociWalker.CalledState state = CallableLociWalker.CalledState.valueOf(bed.getName());
return new CallableLociWalker.CallableBaseState(getToolkit().getGenomeLocParser(),loc, state);
@ -128,7 +129,7 @@ public class CompareCallableLociWalker extends RodWalker<List<CallableLociWalker
public void onTraversalDone(long[][] result) {
for ( CallableLociWalker.CalledState state1 : CallableLociWalker.CalledState.values() ) {
for ( CallableLociWalker.CalledState state2 : CallableLociWalker.CalledState.values() ) {
out.printf("%s %s %s %s %d%n", COMP1, COMP2, state1, state2, result[state1.ordinal()][state2.ordinal()]);
out.printf("%s %s %s %s %d%n", compTrack1.getName(), compTrack2.getName(), state1, state2, result[state1.ordinal()][state2.ordinal()]);
}
}
}

View File

@ -29,7 +29,6 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import java.io.File;
@ -131,7 +130,6 @@ import java.util.List;
* @author Mark DePristo
* @since 7/4/11
*/
@Requires(value={})
public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
/**
* Writes out a file of the DiffEngine format:

View File

@ -26,13 +26,10 @@
package org.broadinstitute.sting.gatk.walkers.filters;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -49,12 +46,11 @@ import java.util.*;
/**
* Filters variant calls using a number of user-selectable, parameterizable criteria.
*/
@Requires(value={})
@Reference(window=@Window(start=-50,stop=50))
public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Input(fullName="mask", doc="Input ROD mask", required=false)
public RodBinding<Feature> mask = RodBinding.makeUnbound(Feature.class);
@ -100,7 +96,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
private void initializeVcfWriter() {
final List<String> inputNames = Arrays.asList(variants.getName());
final List<String> inputNames = Arrays.asList(variantCollection.variants.getName());
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
@ -152,7 +148,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
Collection<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
// is there a SNP mask present?
boolean hasMask = tracker.hasValues(mask);

View File

@ -276,13 +276,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
if ( elt.isReducedRead() ) {
// reduced read representation
byte qual = elt.getReducedQual();
for ( int i = 0; i < elt.getReducedCount(); i++ ) {
add(obsBase, qual, (byte)0, (byte)0);
}
return elt.getQual();
add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
return elt.getReducedCount(); // we added nObs bases here
} else {
byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0) : 0;
return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;
}
}
@ -309,9 +307,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
* @param qual1
* @param obsBase2
* @param qual2 can be 0, indicating no second base was observed for this fragment
* @param nObs The number of times this quad of values was seen. Generally 1, but reduced reads
* can have nObs > 1 for synthetic reads
* @return
*/
private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2) {
private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2, int nObs) {
// TODO-- Right now we assume that there are at most 2 reads per fragment. This assumption is fine
// TODO-- given the current state of next-gen sequencing, but may need to be fixed in the future.
// TODO-- However, when that happens, we'll need to be a lot smarter about the caching we do here.
@ -332,19 +332,17 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
for ( DiploidGenotype g : DiploidGenotype.values() ) {
double likelihood = likelihoods[g.ordinal()];
//if ( VERBOSE ) {
// System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f / %f%n",
// observedBase, g, qualityScore, pow(10,likelihood) * 100, likelihood);
//}
log10Likelihoods[g.ordinal()] += likelihood;
log10Posteriors[g.ordinal()] += likelihood;
log10Likelihoods[g.ordinal()] += likelihood * nObs;
log10Posteriors[g.ordinal()] += likelihood * nObs;
}
return 1;
}
private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2) {
return add(obsBase1, qual1, obsBase2, qual2, 1);
}
// -------------------------------------------------------------------------------------
//
// Dealing with the cache routines

View File

@ -49,7 +49,6 @@ import java.util.TreeSet;
* the name 'allele' so we know which alternate allele to use at each site.
*/
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
@Requires(value={})
@Reference(window=@Window(start=-200,stop=200))
@By(DataSource.READS)
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)

View File

@ -25,7 +25,9 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
@ -51,6 +53,9 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
@ArgumentCollection
private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public List<RodBinding<VariantContext>> variants;
// control the output
@Output(doc="File to which variants should be written",required=true)
protected VCFWriter writer = null;
@ -63,13 +68,8 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
public void initialize() {
for ( ReferenceOrderedDataSource d : getToolkit().getRodDataSources() ) {
if ( d.getName().startsWith("variant") )
trackNames.add(d.getName());
}
if ( trackNames.size() == 0 )
throw new UserException("At least one track bound to a name beginning with 'variant' must be provided.");
for ( RodBinding<VariantContext> rb : variants )
trackNames.add(rb.getName());
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), trackNames);
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);
@ -93,11 +93,7 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
if ( tracker == null )
return null;
List<VariantContext> VCs = new ArrayList<VariantContext>();
for ( String name : trackNames ) {
VariantContext vc = tracker.getFirstValue(VariantContext.class, name, context.getLocation());
VCs.add(vc);
}
List<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
VariantContext mergedVC = mergeVCsWithGLs(VCs);
if ( mergedVC == null )

View File

@ -25,10 +25,13 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
@ -59,6 +62,13 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
/**
* A dbSNP VCF file from which to annotate.
*
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
*/
@ArgumentCollection protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
// control the output
@Output(doc="File to which variants should be written",required=true)
protected VCFWriter writer = null;
@ -130,7 +140,8 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
if ( verboseWriter != null )
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
annotationEngine = new VariantAnnotatorEngine(getToolkit(), Arrays.asList(annotationClassesToUse), annotationsToUse);
// TODO: Fill in the final argument with actual RodBinding map
annotationEngine = new VariantAnnotatorEngine(getToolkit(), Arrays.asList(annotationClassesToUse), annotationsToUse, new HashMap<String, RodBinding<? extends Feature>>());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
// initialize the header

View File

@ -30,16 +30,12 @@ import net.sf.samtools.*;
import net.sf.samtools.util.RuntimeIOException;
import net.sf.samtools.util.SequenceUtil;
import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.BAQMode;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@ -86,6 +82,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
USE_SW
}
@Input(fullName="known", shortName = "known", doc="Input VCF file with known indels", required=false)
public List<RodBinding<VariantContext>> known = Collections.emptyList();
@Input(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true)
protected String intervalsFile = null;
@ -159,21 +158,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
protected boolean CHECKEARLY = false;
// DEPRECATED
@Deprecated
@Argument(fullName="sortInCoordinateOrderEvenThoughItIsHighlyUnsafe", doc="This argument is no longer used.", required=false)
protected boolean DEPRECATED_SORT_IN_COORDINATE_ORDER = false;
@Deprecated
@Argument(fullName="realignReadsWithBadMates", doc="This argument is no longer used.", required=false)
protected boolean DEPRECATED_REALIGN_MATES = false;
@Deprecated
@Argument(fullName="useOnlyKnownIndels", shortName="knownsOnly", doc="This argument is no longer used. See --consensusDeterminationModel instead.", required=false)
protected boolean DEPRECATED_KNOWNS_ONLY = false;
// DEBUGGING OPTIONS FOLLOW
@Hidden
@ -558,8 +542,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
if ( indelRodsSeen.contains(rod) )
continue;
indelRodsSeen.add(rod);
if ( VariantContextAdaptors.canBeConvertedToVariantContext(rod))
knownIndelsToTry.add(VariantContextAdaptors.toVariantContext("", rod, ref));
if ( rod instanceof VariantContext )
knownIndelsToTry.add((VariantContext)rod);
}
}
}

View File

@ -26,7 +26,9 @@
package org.broadinstitute.sting.gatk.walkers.indels;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.BadCigarFilter;
@ -46,6 +48,8 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string.
@ -56,9 +60,13 @@ import java.util.ArrayList;
@By(DataSource.REFERENCE)
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Event, RealignerTargetCreator.Event> {
@Output
protected PrintStream out;
@Input(fullName="known", shortName = "known", doc="Input VCF file with known indels", required=false)
public List<RodBinding<VariantContext>> known = Collections.emptyList();
// mismatch/entropy/SNP arguments
@Argument(fullName="windowSize", shortName="window", doc="window size for calculating entropy or SNP clusters", required=false)
protected int windowSize = 10;
@ -110,7 +118,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
// look at the rods for indels or SNPs
if ( tracker != null ) {
for ( VariantContext vc : tracker.getValues(VariantContext.class) ) {
for ( VariantContext vc : tracker.getValues(known) ) {
switch ( vc.getType() ) {
case INDEL:
hasIndel = true;

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.gatk.walkers.phasing;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -24,6 +26,12 @@ public class MergeAndMatchHaplotypes extends RodWalker<Integer, Integer> {
@Output
protected VCFWriter vcfWriter = null;
@Input(fullName="pbt", shortName = "pbt", doc="Input VCF truth file", required=true)
public RodBinding<VariantContext> pbtTrack;
@Input(fullName="rbp", shortName = "rbp", doc="Input VCF truth file", required=true)
public RodBinding<VariantContext> rbpTrack;
private Map<String, Genotype> pbtCache = new HashMap<String, Genotype>();
private Map<String, Genotype> rbpCache = new HashMap<String, Genotype>();
@ -31,7 +39,7 @@ public class MergeAndMatchHaplotypes extends RodWalker<Integer, Integer> {
public void initialize() {
ArrayList<String> rodNames = new ArrayList<String>();
rodNames.add("pbt");
rodNames.add(pbtTrack.getName());
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
@ -44,8 +52,8 @@ public class MergeAndMatchHaplotypes extends RodWalker<Integer, Integer> {
@Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (tracker != null) {
Collection<VariantContext> pbts = tracker.getValues(VariantContext.class, "pbt", ref.getLocus());
Collection<VariantContext> rbps = tracker.getValues(VariantContext.class, "rbp", ref.getLocus());
Collection<VariantContext> pbts = tracker.getValues(pbtTrack, ref.getLocus());
Collection<VariantContext> rbps = tracker.getValues(rbpTrack, ref.getLocus());
VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null;
VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null;

View File

@ -1,7 +1,9 @@
package org.broadinstitute.sting.gatk.walkers.phasing;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -31,13 +33,16 @@ import java.util.*;
* begin.
*/
public class PhaseByTransmission extends RodWalker<Integer, Integer> {
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Argument(shortName="f", fullName="familySpec", required=true, doc="Patterns for the family structure (usage: mom+dad=child). Specify several trios by supplying this argument many times and/or a file containing many patterns.")
public ArrayList<String> familySpecs = null;
@Output
protected VCFWriter vcfWriter = null;
private final String ROD_NAME = "variant";
private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP";
private final String SOURCE_NAME = "PhaseByTransmission";
@ -102,7 +107,7 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
trios = getFamilySpecsFromCommandLineInput(familySpecs);
ArrayList<String> rodNames = new ArrayList<String>();
rodNames.add(ROD_NAME);
rodNames.add(variantCollection.variants.getName());
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
@ -289,7 +294,7 @@ public class PhaseByTransmission extends RodWalker<Integer, Integer> {
@Override
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (tracker != null) {
VariantContext vc = tracker.getFirstValue(VariantContext.class, ROD_NAME, context.getLocation());
VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation());
Map<String, Genotype> genotypeMap = vc.getGenotypes();

View File

@ -24,12 +24,12 @@
package org.broadinstitute.sting.gatk.walkers.phasing;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.DisjointSet;
@ -67,8 +67,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
*
* All heterozygous variants found in this VCF file will be phased, where possible
*/
@Input(fullName="variant", shortName = "V", doc="Phase variants from this VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Output(doc = "File to which variants should be written", required = true)
protected VCFWriter writer = null;
@ -175,8 +175,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
hInfo.add(new VCFInfoHeaderLine(PHASING_INCONSISTENT_KEY, 0, VCFHeaderLineType.Flag, "Are the reads significantly haplotype-inconsistent?"));
// todo -- fix samplesToPhase
Map<String, VCFHeader> rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName()));
Set<String> samples = new TreeSet<String>(samplesToPhase == null ? rodNameToHeader.get(variants.getName()).getGenotypeSamples() : samplesToPhase);
String trackName = variantCollection.variants.getName();
Map<String, VCFHeader> rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
Set<String> samples = new TreeSet<String>(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase);
writer.writeHeader(new VCFHeader(hInfo, samples));
}
@ -207,7 +208,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
PhasingStats phaseStats = new PhasingStats();
List<VariantContext> unprocessedList = new LinkedList<VariantContext>();
for (VariantContext vc : tracker.getValues(variants, context.getLocation())) {
for (VariantContext vc : tracker.getValues(variantCollection.variants, context.getLocation())) {
if (samplesToPhase != null) vc = reduceVCToSamples(vc, samplesToPhase);
if (ReadBackedPhasingWalker.processVariantInPhasing(vc)) {

View File

@ -25,7 +25,9 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
@ -54,6 +56,9 @@ public class RodSystemValidationWalker extends RodWalker<Integer,Integer> {
// the divider to use in some of the text output
private static final String DIVIDER = ",";
@Input(fullName="eval", shortName = "eval", doc="Input VCF eval file", required=true)
public List<RodBinding<VariantContext>> eval;
@Output
public PrintStream out;
@ -108,7 +113,7 @@ public class RodSystemValidationWalker extends RodWalker<Integer,Integer> {
// if the argument was set, check for equivalence
if (allRecordsVariantContextEquivalent && tracker != null) {
Collection<VariantContext> col = tracker.getValues(VariantContext.class);
Collection<VariantContext> col = tracker.getValues(eval);
VariantContext con = null;
for (VariantContext contextInList : col)
if (con == null) con = contextInList;

View File

@ -28,9 +28,9 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
@ -56,6 +56,11 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
/////////////////////////////
// Inputs
/////////////////////////////
/**
* The raw input variants to be recalibrated.
*/
@Input(fullName="input", shortName = "input", doc="The raw input variants to be recalibrated", required=true)
public List<RodBinding<VariantContext>> input;
@Input(fullName="recal_file", shortName="recalFile", doc="The output recal file used by ApplyRecalibration", required=true)
private File RECAL_FILE;
@Input(fullName="tranches_file", shortName="tranchesFile", doc="The input tranches file describing where to cut the data", required=true)
@ -101,17 +106,8 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
}
Collections.reverse(tranches); // this algorithm wants the tranches ordered from best (lowest truth sensitivity) to worst (highest truth sensitivity)
for( final ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
if( d.getName().startsWith("input") ) {
inputNames.add(d.getName());
logger.info("Found input variant track with name " + d.getName());
} else {
logger.info("Not evaluating ROD binding " + d.getName());
}
}
if( inputNames.size() == 0 ) {
throw new UserException.BadInput( "No input variant tracks found. Input variant binding names must begin with 'input'." );
for( final RodBinding rod : input ) {
inputNames.add( rod.getName() );
}
if( IGNORE_INPUT_FILTERS != null ) {
@ -168,7 +164,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
return 1;
}
for( VariantContext vc : tracker.getValues(VariantContext.class, inputNames, context.getLocation()) ) {
for( VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
if( vc != null ) {
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
String filterString = null;

View File

@ -1,71 +0,0 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.Tags;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 3/12/11
*/
public class TrainingSet {
public String name;
public boolean isKnown = false;
public boolean isTraining = false;
public boolean isAntiTraining = false;
public boolean isTruth = false;
public boolean isConsensus = false;
public double prior = 0.0;
protected final static Logger logger = Logger.getLogger(TrainingSet.class);
public TrainingSet( final String name, final Tags tags ) {
this.name = name;
// Parse the tags to decide which tracks have which properties
if( tags != null ) {
isKnown = tags.containsKey("known") && tags.getValue("known").equals("true");
isTraining = tags.containsKey("training") && tags.getValue("training").equals("true");
isAntiTraining = tags.containsKey("bad") && tags.getValue("bad").equals("true");
isTruth = tags.containsKey("truth") && tags.getValue("truth").equals("true");
isConsensus = tags.containsKey("consensus") && tags.getValue("consensus").equals("true");
prior = ( tags.containsKey("prior") ? Double.parseDouble(tags.getValue("prior")) : prior );
}
// Report back to the user which tracks were found and the properties that were detected
if( !isConsensus && !isAntiTraining ) {
logger.info( String.format( "Found %s track: \tKnown = %s \tTraining = %s \tTruth = %s \tPrior = Q%.1f", this.name, isKnown, isTraining, isTruth, prior) );
} else if( isConsensus ) {
logger.info( String.format( "Found consensus track: %s", this.name) );
} else {
logger.info( String.format( "Found bad sites training track: %s", this.name) );
}
}
}

View File

@ -26,10 +26,10 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
/**
@ -51,7 +52,6 @@ public class VariantDataManager {
private final double[] meanVector;
private final double[] varianceVector; // this is really the standard deviation
public final ArrayList<String> annotationKeys;
private final ExpandingArrayList<TrainingSet> trainingSets;
private final VariantRecalibratorArgumentCollection VRAC;
protected final static Logger logger = Logger.getLogger(VariantDataManager.class);
@ -62,7 +62,6 @@ public class VariantDataManager {
this.VRAC = VRAC;
meanVector = new double[this.annotationKeys.size()];
varianceVector = new double[this.annotationKeys.size()];
trainingSets = new ExpandingArrayList<TrainingSet>();
}
public void setData( final ExpandingArrayList<VariantDatum> data ) {
@ -105,31 +104,6 @@ public class VariantDataManager {
}
}
public void addTrainingSet( final TrainingSet trainingSet ) {
trainingSets.add( trainingSet );
}
public boolean checkHasTrainingSet() {
for( final TrainingSet trainingSet : trainingSets ) {
if( trainingSet.isTraining ) { return true; }
}
return false;
}
public boolean checkHasTruthSet() {
for( final TrainingSet trainingSet : trainingSets ) {
if( trainingSet.isTruth ) { return true; }
}
return false;
}
public boolean checkHasKnownSet() {
for( final TrainingSet trainingSet : trainingSets ) {
if( trainingSet.isKnown ) { return true; }
}
return false;
}
public ExpandingArrayList<VariantDatum> getTrainingData() {
final ExpandingArrayList<VariantDatum> trainingData = new ExpandingArrayList<VariantDatum>();
for( final VariantDatum datum : data ) {
@ -240,13 +214,14 @@ public class VariantDataManager {
if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
}
if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) {
// normalize QD by event length for indel case
int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now
if (eventLength > 0) // sanity check
value /= (double)eventLength;
}
if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) {
// normalize QD by event length for indel case
int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now
if (eventLength > 0) { // sanity check
value /= (double)eventLength;
}
}
if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
@ -257,30 +232,44 @@ public class VariantDataManager {
return value;
}
public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC ) {
public void parseTrainingSets( final RefMetaDataTracker tracker, final GenomeLoc genomeLoc, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC, final HashMap<String, Double> rodToPriorMap,
final List<RodBinding<VariantContext>> training, final List<RodBinding<VariantContext>> truth, final List<RodBinding<VariantContext>> known, final List<RodBinding<VariantContext>> badSites) {
datum.isKnown = false;
datum.atTruthSite = false;
datum.atTrainingSite = false;
datum.atAntiTrainingSite = false;
datum.prior = 2.0;
datum.consensusCount = 0;
for( final TrainingSet trainingSet : trainingSets ) {
for( final VariantContext trainVC : tracker.getValues(VariantContext.class, trainingSet.name, ref.getLocus()) ) {
if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
datum.isKnown = datum.isKnown || trainingSet.isKnown;
datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;
datum.prior = Math.max( datum.prior, trainingSet.prior );
datum.consensusCount += ( trainingSet.isConsensus ? 1 : 0 );
for( final RodBinding<VariantContext> rod : training ) {
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
datum.atTrainingSite = true;
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
}
}
}
for( final RodBinding<VariantContext> rod : truth ) {
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
datum.atTruthSite = true;
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
}
}
}
for( final RodBinding<VariantContext> rod : known ) {
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
datum.isKnown = true;
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
}
}
}
for( final RodBinding<VariantContext> rod : badSites ) {
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
if( trainVC != null ) {
datum.atAntiTrainingSite = datum.atAntiTrainingSite || trainingSet.isAntiTraining;
datum.atAntiTrainingSite = true;
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
}
}
}
}
@ -292,4 +281,10 @@ public class VariantDataManager {
(datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL")));
}
}
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic());
}
}

View File

@ -25,13 +25,9 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
@ -57,11 +53,51 @@ import java.util.*;
public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDatum>, ExpandingArrayList<VariantDatum>> implements TreeReducible<ExpandingArrayList<VariantDatum>> {
public static final String VQS_LOD_KEY = "VQSLOD";
public static final String CULPRIT_KEY = "culprit";
public static final String VQS_LOD_KEY = "VQSLOD"; // Log odds ratio of being a true variant versus being false under the trained gaussian mixture model
public static final String CULPRIT_KEY = "culprit"; // The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out
@ArgumentCollection private VariantRecalibratorArgumentCollection VRAC = new VariantRecalibratorArgumentCollection();
/////////////////////////////
// Inputs
/////////////////////////////
/**
* The raw input variants to be recalibrated.
*/
@Input(fullName="input", shortName = "input", doc="The raw input variants to be recalibrated", required=true)
public List<RodBinding<VariantContext>> input;
/**
* A list of training variants used to train the Gaussian mixture model.
*
* Input variants which are found to overlap with these training sites are used to build the Gaussian mixture model.
*/
@Input(fullName="training", shortName = "training", doc="A list of training variants used to train the Gaussian mixture model", required=true)
public List<RodBinding<VariantContext>> training;
/**
* A list of true variants to be used when deciding the truth sensitivity cut of the final callset.
*
* When deciding where to set the cutoff in VQSLOD sensitivity to these truth sites is used.
* Typically one might want to say I dropped my threshold until I got back 99% of HapMap sites, for example.
*/
@Input(fullName="truth", shortName = "truth", doc="A list of true variants to be used when deciding the truth sensitivity cut of the final callset", required=true)
public List<RodBinding<VariantContext>> truth;
/**
* A list of known variants to be used for metric comparison purposes.
*
* The known / novel status of a variant isn't used by the algorithm itself and is only used for reporting / display purposes.
* The output metrics are stratified by known status in order to aid in comparisons with other call sets.
*/
@Input(fullName="known", shortName = "known", doc="A list of known variants to be used for metric comparison purposes", required=false)
public List<RodBinding<VariantContext>> known = Collections.emptyList();
/**
* A list of known bad variants used to supplement training the negative model.
*
* In addition to using the worst 3% of variants as compared to the Gaussian mixture model, we can also supplement the list
* with a database of known bad variants. Maybe these are loci which are frequently filtered out in many projects (centromere, for example).
*/
@Input(fullName="badSites", shortName = "badSites", doc="A list of known bad variants used to supplement training the negative model", required=false)
public List<RodBinding<VariantContext>> badSites = Collections.emptyList();
/////////////////////////////
// Outputs
/////////////////////////////
@ -96,9 +132,9 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
@Hidden
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
protected Boolean TRUST_ALL_POLYMORPHIC = false;
@Hidden
@Argument(fullName = "projectConsensus", shortName = "projectConsensus", doc = "Perform 1000G project consensus. This implies an extra prior factor based on the individual participant callsets passed in with consensus=true rod binding tags.", required = false)
protected Boolean PERFORM_PROJECT_CONSENSUS = false;
//@Hidden
//@Argument(fullName = "projectConsensus", shortName = "projectConsensus", doc = "Perform 1000G project consensus. This implies an extra prior factor based on the individual participant callsets passed in with consensus=true rod binding tags.", required = false)
//protected Boolean PERFORM_PROJECT_CONSENSUS = false;
/////////////////////////////
// Private Member Variables
@ -106,8 +142,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
private VariantDataManager dataManager;
private PrintStream tranchesStream;
private final Set<String> ignoreInputFilterSet = new TreeSet<String>();
private final Set<String> inputNames = new HashSet<String>();
private final VariantRecalibratorEngine engine = new VariantRecalibratorEngine( VRAC );
private final HashMap<String, Double> rodToPriorMap = new HashMap<String, Double>();
//---------------------------------------------------------------------------------------------------------------
//
@ -123,31 +159,24 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
ignoreInputFilterSet.addAll( Arrays.asList(IGNORE_INPUT_FILTERS) );
}
for( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
if( d.getName().toLowerCase().startsWith("input") ) {
inputNames.add(d.getName());
logger.info( "Found input variant track with name " + d.getName() );
} else {
dataManager.addTrainingSet( new TrainingSet(d.getName(), d.getTags()) );
}
}
if( !dataManager.checkHasTrainingSet() ) {
throw new UserException.CommandLineException( "No training set found! Please provide sets of known polymorphic loci marked with the training=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
}
if( !dataManager.checkHasTruthSet() ) {
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
}
if( inputNames.size() == 0 ) {
throw new UserException.BadInput( "No input variant tracks found. Input variant binding names must begin with 'input'." );
}
try {
tranchesStream = new PrintStream(TRANCHES_FILE);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
}
final ArrayList<RodBinding<VariantContext>> allInputBindings = new ArrayList<RodBinding<VariantContext>>();
allInputBindings.addAll(truth);
allInputBindings.addAll(training);
allInputBindings.addAll(known);
allInputBindings.addAll(badSites);
for( final RodBinding<VariantContext> rod : allInputBindings ) {
try {
rodToPriorMap.put(rod.getName(), (rod.getTags().containsKey("prior") ? Double.parseDouble(rod.getTags().getValue("prior")) : 0.0) );
} catch( NumberFormatException e ) {
throw new UserException.BadInput("Bad rod binding syntax. Prior key-value tag detected but isn't parsable. Expecting something like -training:prior=12.0 my.set.vcf");
}
}
}
//---------------------------------------------------------------------------------------------------------------
@ -163,10 +192,12 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
return mapList;
}
for( final VariantContext vc : tracker.getValues(VariantContext.class, inputNames, context.getLocation()) ) {
for( final VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
if( vc != null && ( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) ) {
if( checkRecalibrationMode( vc, VRAC.MODE ) ) {
final VariantDatum datum = new VariantDatum();
// Populate the datum with lots of fields from the VariantContext, unfortunately the VC is too big so we just pull in only the things we absolutely need.
dataManager.decodeAnnotations( datum, vc, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine
datum.contig = vc.getChr();
datum.start = vc.getStart();
@ -176,12 +207,12 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
datum.isTransition = datum.isSNP && VariantContextUtils.isTransition(vc);
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
dataManager.parseTrainingSets( tracker, ref, context, vc, datum, TRUST_ALL_POLYMORPHIC );
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC, rodToPriorMap, training, truth, known, badSites );
double priorFactor = QualityUtils.qualToProb( datum.prior );
if( PERFORM_PROJECT_CONSENSUS ) {
final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
}
//if( PERFORM_PROJECT_CONSENSUS ) {
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
// priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
//}
datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );
mapList.add( datum );

View File

@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.SampleUtils;
@ -64,8 +63,8 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
* are techincally order dependent. It is strongly recommended to provide explicit names when
* a rod priority list is provided.
*/
@Input(fullName = "variant", shortName = "V", doc="The VCF files to merge together", required=true)
public List<RodBinding<VariantContext>> variantsToMerge;
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public List<RodBinding<VariantContext>> variants;
@Output(doc="File to which variants should be written",required=true)
protected VCFWriter vcfWriter = null;
@ -157,7 +156,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
// get all of the vcf rods at this locus
// Need to provide reference bases to simpleMerge starting at current locus
Collection<VariantContext> vcs = tracker.getValues(variantsToMerge, context.getLocation());
Collection<VariantContext> vcs = tracker.getValues(variants, context.getLocation());
if ( sitesOnlyVCF ) {
vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs);

View File

@ -24,9 +24,9 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -46,10 +46,10 @@ import java.util.Set;
* Filters a lifted-over VCF file for ref bases that have been changed.
*/
@Reference(window=@Window(start=0,stop=100))
@Requires(value={})
public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
@Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
private static final int MAX_VARIANT_SIZE = 100;
@ -59,10 +59,11 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
private long failedLocs = 0, totalLocs = 0;
public void initialize() {
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName()));
String trackName = variantCollection.variants.getName();
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(variants.getName()) ? vcfHeaders.get(variants.getName()).getMetaData() : null, samples);
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaData() : null, samples);
writer.writeHeader(vcfHeader);
}
@ -89,7 +90,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
Collection<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
for ( VariantContext vc : VCs )
filterAndWrite(ref.getBases(), vc);

View File

@ -28,9 +28,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -48,10 +48,10 @@ import java.util.*;
* Left-aligns indels from a variants file.
*/
@Reference(window=@Window(start=-200,stop=200))
@Requires(value={})
public class LeftAlignVariants extends RodWalker<Integer, Integer> {
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Output(doc="File to which variants should be written",required=true)
protected VCFWriter baseWriter = null;
@ -59,10 +59,11 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
private SortingVCFWriter writer;
public void initialize() {
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName()));
String trackName = variantCollection.variants.getName();
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
Set<VCFHeaderLine> headerLines = vcfHeaders.get(variants.getName()).getMetaData();
Set<VCFHeaderLine> headerLines = vcfHeaders.get(trackName).getMetaData();
baseWriter.writeHeader(new VCFHeader(headerLines, samples));
writer = new SortingVCFWriter(baseWriter, 200);
@ -72,7 +73,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
Collection<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
int changedSites = 0;
for ( VariantContext vc : VCs )

View File

@ -29,14 +29,11 @@ import net.sf.picard.liftover.LiftOver;
import net.sf.picard.util.Interval;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
@ -50,10 +47,10 @@ import java.util.*;
/**
* Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted.
*/
@Requires(value={})
public class LiftoverVariants extends RodWalker<Integer, Integer> {
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Output(doc="File to which variants should be written",required=true)
protected File file = null;
@ -88,12 +85,13 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
throw new UserException.BadInput("the chain file you are using is not compatible with the reference you are trying to lift over to; please use the appropriate chain file for the given reference");
}
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName()));
String trackName = variantCollection.variants.getName();
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
if ( vcfHeaders.containsKey(variants.getName()) )
metaData.addAll(vcfHeaders.get(variants.getName()).getMetaData());
if ( vcfHeaders.containsKey(trackName) )
metaData.addAll(vcfHeaders.get(trackName).getMetaData());
if ( RECORD_ORIGINAL_LOCATION ) {
metaData.add(new VCFInfoHeaderLine("OriginalChr", 1, VCFHeaderLineType.String, "Original contig name for the record"));
metaData.add(new VCFInfoHeaderLine("OriginalStart", 1, VCFHeaderLineType.Integer, "Original start position for the record"));
@ -146,7 +144,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
Collection<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
for ( VariantContext vc : VCs )
convertAndWrite(vc, ref);

View File

@ -24,15 +24,12 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
@ -45,10 +42,10 @@ import java.util.*;
/**
* Takes a VCF file, randomly splits variants into two different sets, and outputs 2 new VCFs with the results.
*/
@Requires(value={})
public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
@Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Output(fullName="out1", shortName="o1", doc="File #1 to which variants should be written", required=true)
protected VCFWriter vcfWriter1 = null;
@ -72,7 +69,7 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
iFraction = (int)(fraction * 1000.0);
// setup the header info
final List<String> inputNames = Arrays.asList(variants.getName());
final List<String> inputNames = Arrays.asList(variantCollection.variants.getName());
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames);
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
@ -94,7 +91,7 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
Collection<VariantContext> vcs = tracker.getValues(variants, context.getLocation());
Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
for ( VariantContext vc : vcs ) {
int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000);
if ( random < iFraction )

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -37,7 +38,6 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -53,7 +53,6 @@ import java.util.*;
* Takes a VCF file, selects variants based on sample(s) in which it was found and/or on various annotation criteria,
* recompute the value of certain annotations based on the new sample set, and output a new VCF with the results.
*/
@Requires(value={})
public class SelectVariants extends RodWalker<Integer, Integer> {
/**
* The VCF file we are selecting variants from.
@ -61,8 +60,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
* Variants from this file are sent through the filtering and modifying routines as directed
* by the arguments to SelectVariants, and finally are emitted.
*/
@Input(fullName="variant", shortName = "V", doc="Select variants from this VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/**
* If provided, we will filter out variants that are "discordant" to the variants in this file
@ -194,7 +192,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
*/
public void initialize() {
// Get list of samples to include in the output
List<String> rodNames = Arrays.asList(variants.getName());
List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
@ -318,7 +316,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
Collection<VariantContext> vcs = tracker.getValues(variants, context.getLocation());
Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
if ( vcs == null || vcs.size() == 0) {
return 0;

View File

@ -28,13 +28,11 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.Feature;
import org.broad.tribble.TribbleException;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.*;
@ -53,11 +51,13 @@ import java.util.Set;
* Validates a variants file.
*/
@Reference(window=@Window(start=0,stop=100))
@Requires(value={})
public class ValidateVariants extends RodWalker<Integer, Integer> {
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@ArgumentCollection
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
public enum ValidationType {
ALL, REF, IDS, ALLELES, CHR_COUNTS
@ -78,14 +78,14 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
private File file = null;
public void initialize() {
file = new File(variants.getSource());
file = new File(variantCollection.variants.getSource());
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return 0;
Collection<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
for ( VariantContext vc : VCs )
validate(vc, tracker, ref);
@ -141,8 +141,8 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
// get the RS IDs
Set<String> rsIDs = null;
if ( tracker.hasValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
List<Feature> dbsnpList = tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME);
if ( tracker.hasValues(dbsnp.dbsnp) ) {
List<VariantContext> dbsnpList = tracker.getValues(dbsnp.dbsnp, ref.getLocus());
rsIDs = new HashSet<String>();
for ( Object d : dbsnpList ) {
if (d instanceof DbSNPFeature )

View File

@ -36,7 +36,6 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -47,7 +46,6 @@ import java.util.*;
* Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes)
*/
@Reference(window=@Window(start=0,stop=40))
@Requires(value={})
public class VariantValidationAssessor extends RodWalker<VariantContext,Integer> {
@Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;

View File

@ -24,16 +24,13 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -45,11 +42,10 @@ import java.util.*;
/**
* Emits specific fields as dictated by the user from one or more VCF files.
*/
@Requires(value={})
public class VariantsToTable extends RodWalker<Integer, Integer> {
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@Output(doc="File to which results should be written",required=true)
protected PrintStream out;
@ -138,7 +134,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
return 0;
if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) {
for ( VariantContext vc : tracker.getValues(variants, context.getLocation())) {
for ( VariantContext vc : tracker.getValues(variantCollection.variants, context.getLocation())) {
if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) {
List<String> vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA);
out.println(Utils.join("\t", vals));

View File

@ -56,7 +56,6 @@ import java.util.*;
/**
* Converts variants from other file formats to VCF format.
*/
@Requires(value={})
@Reference(window=@Window(start=-40,stop=40))
public class VariantsToVCF extends RodWalker<Integer, Integer> {

View File

@ -27,11 +27,16 @@ package org.broadinstitute.sting.utils.codecs.snpEff;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
/**
@ -68,7 +73,7 @@ import java.io.IOException;
*
* @author David Roazen
*/
public class SnpEffCodec implements FeatureCodec {
public class SnpEffCodec implements FeatureCodec, SelfScopingFeatureCodec {
public static final int EXPECTED_NUMBER_OF_FIELDS = 23;
public static final String FIELD_DELIMITER_PATTERN = "\\t";
@ -255,4 +260,16 @@ public class SnpEffCodec implements FeatureCodec {
}
}
}
public boolean canDecode ( final File potentialInput ) {
try {
LineReader reader = new AsciiLineReader(new FileInputStream(potentialInput));
readHeader(reader);
}
catch ( Exception e ) {
return false;
}
return true;
}
}

View File

@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk;
import org.testng.Assert;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
@ -64,7 +63,6 @@ public class WalkerManagerUnitTest {
}
@Hidden
@Requires(value={})
class UninstantiableWalker extends Walker<Integer,Long> {
// Private constructor will generate uninstantiable message
private UninstantiableWalker() {}

View File

@ -129,8 +129,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testSnpEffAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variant:VCF " +
validationDataLocation + "1000G.exomes.vcf --snpEffFile:SnpEff " + validationDataLocation +
"-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variant " +
validationDataLocation + "1000G.exomes.vcf --snpEffFile " + validationDataLocation +
"snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000",
1,
Arrays.asList("c08648a078368c80530bff004b3157f1")

View File

@ -30,7 +30,7 @@ import org.testng.annotations.Test;
import java.util.Arrays;
public class CompareCallableLociWalkerIntegrationTest extends WalkerTest {
final static String commonArgs = "-R " + hg18Reference + " -T CompareCallableLoci -B:comp1,Bed " + validationDataLocation + "1kg_slx.chr1_10mb.callable.bed -B:comp2,Bed " + validationDataLocation + "ga2_slx.chr1_10mb.callable.bed -o %s";
final static String commonArgs = "-R " + hg18Reference + " -T CompareCallableLoci --comp1:Bed " + validationDataLocation + "1kg_slx.chr1_10mb.callable.bed --comp2:Bed " + validationDataLocation + "ga2_slx.chr1_10mb.callable.bed -o %s";
@Test
public void testCompareCallableLociWalker1() {

View File

@ -15,7 +15,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest {
" -glm BOTH" +
" -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" +
" -L chr1:1-50,000,000" +
" -dbsnp:VCF " + b36dbSNP129 +
" --dbsnp:VCF " + b36dbSNP129 +
" -o /dev/null",
0,
new ArrayList<String>(0));
@ -30,7 +30,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest {
" -glm BOTH" +
" -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" +
" -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" +
" -dbsnp:vcf " + b36dbSNP129 +
" --dbsnp:vcf " + b36dbSNP129 +
" -o /dev/null",
0,
new ArrayList<String>(0));
@ -46,7 +46,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest {
" -glm BOTH" +
" -L chr1:1-50,000,000" +
" -nt 10" +
" -dbsnp:vcf " + b36dbSNP129 +
" --dbsnp:vcf " + b36dbSNP129 +
" -o /dev/null",
0,
new ArrayList<String>(0));

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.indels;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import java.util.Arrays;
@ -28,7 +27,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
executeTest("test realigner defaults", spec1);
WalkerTestSpec spec2 = new WalkerTestSpec(
baseCommand + "-B:indels,vcf " + knownIndels,
baseCommand + "-known " + knownIndels,
1,
Arrays.asList(base_md5_with_SW_or_VCF));
executeTest("test realigner defaults with VCF", spec2);
@ -37,7 +36,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
@Test
public void testKnownsOnly() {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -B:indels,vcf " + knownIndels,
baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -known " + knownIndels,
1,
Arrays.asList("3dd5d2c9931b375455af0bff1a2c4888"));
executeTest("realigner known indels only from VCF", spec1);
@ -46,7 +45,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
@Test
public void testUseSW() {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseCommand + "--consensusDeterminationModel USE_SW -B:indels,vcf " + knownIndels,
baseCommand + "--consensusDeterminationModel USE_SW -known " + knownIndels,
1,
Arrays.asList(base_md5_with_SW_or_VCF));
executeTest("realigner use SW from VCF", spec1);

View File

@ -30,7 +30,7 @@ public class IndelRealignerPerformanceTest extends WalkerTest {
" -LOD 5" +
" -maxConsensuses 100" +
" -greedy 100" +
" -dbsnp:vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
" -known " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
" -o /dev/null" +
" -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" +
" -L chr1:1-5,650,000" +
@ -45,7 +45,7 @@ public class IndelRealignerPerformanceTest extends WalkerTest {
" -LOD 5" +
" -maxConsensuses 100" +
" -greedy 100" +
" -dbsnp:vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
" -known " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
" -o /dev/null" +
" -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" +
" -L chr1:1-150,000,000" +

View File

@ -17,13 +17,13 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
executeTest("test standard", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T RealignerTargetCreator -B:dbsnp,vcf " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
"-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
1,
Arrays.asList("0367d39a122c8ac0899fb868a82ef728"));
executeTest("test dbsnp", spec2);
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
"-T RealignerTargetCreator -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI indels -o %s",
"-T RealignerTargetCreator -R " + b36KGReference + " --known " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI known -o %s",
1,
Arrays.asList("5206cee6c01b299417bf2feeb8b3dc96"));
executeTest("test rods only", spec3);

View File

@ -12,7 +12,7 @@ public class RealignerTargetCreatorPerformanceTest extends WalkerTest {
WalkerTestSpec spec1 = new WalkerTestSpec(
"-R " + hg18Reference +
" -T RealignerTargetCreator" +
" -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
" --known " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
" -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" +
" -L chr1:1-50,000,000" +
" -o /dev/null",
@ -23,7 +23,7 @@ public class RealignerTargetCreatorPerformanceTest extends WalkerTest {
WalkerTestSpec spec2 = new WalkerTestSpec(
"-R " + hg18Reference +
" -T RealignerTargetCreator" +
" -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
" --known " + GATKDataLocation + "dbsnp_132.hg18.vcf" +
" -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" +
" -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" +
" -o /dev/null",

View File

@ -16,8 +16,8 @@ public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest {
buildCommandLine(
"-T MergeAndMatchHaplotypes",
"-R " + b37KGReference,
"-B:pbt,VCF " + fundamentalTestPBTVCF,
"-B:rbp,VCF " + fundamentalTestRBPVCF,
"--pbt " + fundamentalTestPBTVCF,
"--rbp " + fundamentalTestRBPVCF,
"-o %s"
),
1,

View File

@ -16,7 +16,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"-B:variant,VCF " + fundamentalTestVCF,
"--variant " + fundamentalTestVCF,
"-f NA12892+NA12891=NA12878",
"-o %s"
),

View File

@ -41,11 +41,13 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
//System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b37KGReference +
" -B:dbsnp,VCF,known=true,training=false,truth=false,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
" -B:hapmap,VCF,known=false,training=true,truth=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
" -B:omni,VCF,known=false,training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
" -known:prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
" -training:prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
" -truth:prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
" -training:prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
" -truth:prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
" -T VariantRecalibrator" +
" -B:input,VCF " + params.inVCF +
" -input " + params.inVCF +
" -L 20:1,000,000-40,000,000" +
" -an QD -an HaplotypeScore -an HRun" +
" -percentBad 0.07" +
@ -64,7 +66,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -T ApplyRecalibration" +
" -L 20:12,000,000-30,000,000" +
" -NO_HEADER" +
" -B:input,VCF " + params.inVCF +
" -input " + params.inVCF +
" -o %s" +
" -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) +
" -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null),

View File

@ -55,7 +55,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant:VCF " + testFile + " -o %s -NO_HEADER",
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant " + testFile + " -o %s -NO_HEADER",
1,
Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a")
);
@ -68,7 +68,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String testFile = validationDataLocation + "combine.3.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 -B:variant,VCF " + testFile + " -o %s -NO_HEADER",
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s -NO_HEADER",
1,
Arrays.asList("")
);

View File

@ -95,7 +95,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
@Test
public void testBadID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad.vcf", "IDS") + " -B:dbsnp,vcf " + b36dbSNP129,
baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129,
0,
UserException.MalformedFile.class
);

View File

@ -15,8 +15,7 @@ public class VariantContextIntegrationTest extends WalkerTest {
" -R " + b36KGReference;
private static String root = cmdRoot +
" -L 1:1-1,000,000 -B:dbsnp,vcf " + b36dbSNP129 +
" -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";
" -L 1:1-1,000,000 -V " + b36dbSNP129;
private static final class VCITTest extends TestDataProvider {
String args, md5;
@ -30,15 +29,15 @@ public class VariantContextIntegrationTest extends WalkerTest {
@DataProvider(name = "VCITTestData")
public Object[][] createVCITTestData() {
new VCITTest("--printPerLocus", "");
new VCITTest("--printPerLocus --onlyContextsOfType SNP", "");
new VCITTest("--printPerLocus --onlyContextsOfType INDEL", "");
new VCITTest("--printPerLocus --onlyContextsOfType MIXED", "");
new VCITTest("--printPerLocus --onlyContextsOfType NO_VARIATION", "");
new VCITTest("--printPerLocus --takeFirstOnly", "");
new VCITTest("--printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "");
new VCITTest("--printPerLocus --onlyContextsStartinAtCurrentPosition", "");
new VCITTest("--printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "");
new VCITTest("--printPerLocus", "e9d0f1fe80659bb55b40aa6c3a2e921e");
new VCITTest("--printPerLocus --onlyContextsOfType SNP", "0e620db3e45771df42c54a9c0ae4a29f");
new VCITTest("--printPerLocus --onlyContextsOfType INDEL", "b725c204fefe3814644d50e7c20f9dfe");
new VCITTest("--printPerLocus --onlyContextsOfType MIXED", "3ccc33f496a1718df55722d11cc14334");
new VCITTest("--printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc");
new VCITTest("--printPerLocus --takeFirstOnly", "3a45561da042b2b44b6a679744f16103");
new VCITTest("--printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "4746f269ecc377103f83eb61cc162c39");
new VCITTest("--printPerLocus --onlyContextsStartinAtCurrentPosition", "2749e3fae458650a85a2317e346dc44c");
new VCITTest("--printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9bd48c2a40813023e29ffaa23d59d382");
return VCITTest.getTests(VCITTest.class);
}
@ -58,7 +57,7 @@ public class VariantContextIntegrationTest extends WalkerTest {
public void testToVCF() {
// this really just tests that we are seeing the same number of objects over all of chr1
WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s",
WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -V:VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s",
2, // just one output file
Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "ff91731213fd0bbdc200ab6fd1c93e63"));
executeTest("testToVCF", spec);

View File

@ -44,7 +44,6 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
this.intervalsString = this.originalGATK.intervalsString
this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) }
this.rod_priority_list = (0 until this.gatherParts.size).map("input"+_).mkString(",")
this.out = this.originalOutput
this.assumeIdenticalSamples = true