diff --git a/public/R/queueJobReport.R b/public/R/queueJobReport.R index 18d33054e..a24d269c9 100644 --- a/public/R/queueJobReport.R +++ b/public/R/queueJobReport.R @@ -12,7 +12,9 @@ if ( onCMDLine ) { inputFileName = args[1] outputPDF = args[2] } else { - inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt" + #inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt" + inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt" + #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt" outputPDF = NA } @@ -113,11 +115,22 @@ plotGroup <- function(groupTable) { textplot(as.data.frame(sum), show.rownames=F) title(paste("Job summary for", name, "itemizing each iteration"), cex=3) + # histogram of job times by groupAnnotations + if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) { + # todo -- how do we group by annotations? + p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram() + p <- p + xlab("runtime in seconds") + ylab("No. of jobs") + p <- p + opts(title=paste("Job runtime histogram for", name)) + print(p) + } + # as above, but averaging over all iterations groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") - sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) - textplot(as.data.frame(sum), show.rownames=F) - title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + if ( dim(sub)[1] > 1 ) { + sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) + textplot(as.data.frame(sum), show.rownames=F) + title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + } } # print out some useful basic information @@ -146,7 +159,7 @@ plotJobsGantt(gatkReportData, T) plotJobsGantt(gatkReportData, F) plotProgressByTime(gatkReportData) for ( group in gatkReportData ) { - plotGroup(group) + plotGroup(group) } if ( ! is.na(outputPDF) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 59fb4aa9e..3b9e35311 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -97,7 +97,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar if (!( walker instanceof TreeReducible )) throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers"); - traversalEngine.startTimers(); ReduceTree reduceTree = new ReduceTree(this); initializeWalker(walker); diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 65ff27497..09ab4bd44 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -44,7 +44,6 @@ public class LinearMicroScheduler extends MicroScheduler { * @param shardStrategy A strategy for sharding the data. */ public Object execute(Walker walker, ShardStrategy shardStrategy) { - traversalEngine.startTimers(); walker.initialize(); Accumulator accumulator = Accumulator.create(engine,walker); @@ -54,6 +53,7 @@ public class LinearMicroScheduler extends MicroScheduler { if ( done || shard == null ) // we ran out of shards that aren't owned break; + traversalEngine.startTimersIfNecessary(); if(shard.getShardType() == Shard.ShardType.LOCUS) { LocusWalker lWalker = (LocusWalker)walker; WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index 6136bd68d..2b6488ada 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -57,6 +57,7 @@ public class ShardTraverser implements Callable { public Object call() { try { + traversalEngine.startTimersIfNecessary(); long startTime = System.currentTimeMillis(); Object accumulator = walker.reduceInit(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index bf490e28c..7bf518fd5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.refdata; import net.sf.samtools.util.SequenceUtil; import org.broad.tribble.Feature; import org.broad.tribble.annotation.Strand; -import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broad.tribble.dbsnp.OldDbSNPFeature; import org.broad.tribble.gelitext.GeliTextFeature; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -93,27 +93,27 @@ public class VariantContextAdaptors { // -------------------------------------------------------------------------------------------------------------- private static class DBSnpAdaptor implements VCAdaptor { - private static boolean isSNP(DbSNPFeature feature) { + private static boolean isSNP(OldDbSNPFeature feature) { return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact"); } - private static boolean isMNP(DbSNPFeature feature) { + private static boolean isMNP(OldDbSNPFeature feature) { return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range"); } - private static boolean isInsertion(DbSNPFeature feature) { + private static boolean isInsertion(OldDbSNPFeature feature) { return feature.getVariantType().contains("insertion"); } - private static boolean isDeletion(DbSNPFeature feature) { + private static boolean isDeletion(OldDbSNPFeature feature) { return feature.getVariantType().contains("deletion"); } - private static boolean isIndel(DbSNPFeature feature) { + private static boolean isIndel(OldDbSNPFeature feature) { return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature); } - public static boolean isComplexIndel(DbSNPFeature feature) { + public static boolean isComplexIndel(OldDbSNPFeature feature) { return feature.getVariantType().contains("in-del"); } @@ -125,7 +125,7 @@ public class VariantContextAdaptors { * * @return an alternate allele list */ - public static List getAlternateAlleleList(DbSNPFeature feature) { + public static List getAlternateAlleleList(OldDbSNPFeature feature) { List ret = new ArrayList(); for (String allele : getAlleleList(feature)) if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele); @@ -139,7 +139,7 @@ public class VariantContextAdaptors { * * @return an alternate allele list */ - public static List getAlleleList(DbSNPFeature feature) { + public static List getAlleleList(OldDbSNPFeature feature) { List alleleList = new ArrayList(); // add ref first if ( feature.getStrand() == Strand.POSITIVE ) @@ -156,14 +156,14 @@ public class VariantContextAdaptors { /** * Converts non-VCF formatted dbSNP records to VariantContext. - * @return DbSNPFeature. + * @return OldDbSNPFeature. */ @Override - public Class getAdaptableFeatureType() { return DbSNPFeature.class; } + public Class getAdaptableFeatureType() { return OldDbSNPFeature.class; } @Override public VariantContext convert(String name, Object input, ReferenceContext ref) { - DbSNPFeature dbsnp = (DbSNPFeature)input; + OldDbSNPFeature dbsnp = (OldDbSNPFeature)input; if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) return null; Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true); diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 89a179d0e..27fd173cb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -115,12 +115,13 @@ public abstract class TraversalEngine,Provide LinkedList history = new LinkedList(); /** We use the SimpleTimer to time our run */ - private SimpleTimer timer = new SimpleTimer("Traversal"); + private SimpleTimer timer = null; // How long can we go without printing some progress info? private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000; private int printProgressCheckCounter = 0; private long lastProgressPrintTime = -1; // When was the last time we printed progress log? + private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 120 * 1000; // in milliseconds private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0; private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0; @@ -209,11 +210,16 @@ public abstract class TraversalEngine,Provide } } /** - * Should be called to indicate that we're going to process records and the timer should start ticking + * Should be called to indicate that we're going to process records and the timer should start ticking. This + * function should be called right before any traversal work is done, to avoid counting setup costs in the + * processing costs and inflating the estimated runtime. */ - public void startTimers() { - timer.start(); - lastProgressPrintTime = timer.currentTime(); + public void startTimersIfNecessary() { + if ( timer == null ) { + timer = new SimpleTimer("Traversal"); + timer.start(); + lastProgressPrintTime = timer.currentTime(); + } } /** @@ -224,7 +230,8 @@ public abstract class TraversalEngine,Provide * @return true if the maximum interval (in millisecs) has passed since the last printing */ private boolean maxElapsedIntervalForPrinting(final long curTime, long lastPrintTime, long printFreq) { - return (curTime - lastPrintTime) > printFreq; + long elapsed = curTime - lastPrintTime; + return elapsed > printFreq && elapsed > MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS; } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 60f0fcb0a..880dba5d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -79,24 +79,45 @@ public class BeagleOutputToVCFWalker extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + /** + * If this argument is present, the original allele frequencies and counts from this vcf are added as annotations ACH,AFH and ANH. at each record present in this vcf + */ @Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false) public RodBinding comp; + + /** + * This required argument is used to annotate each site in the vcf INFO field with R2 annotation. Will be NaN if Beagle determined there are no variant samples. + */ @Input(fullName="beagleR2", shortName = "beagleR2", doc="Beagle-produced .r2 file containing R^2 values for all markers", required=true) public RodBinding beagleR2; + /** + * These values will populate the GL field for each sample and contain the posterior probability of each genotype given the data after phasing and imputation. + */ @Input(fullName="beagleProbs", shortName = "beagleProbs", doc="Beagle-produced .probs file containing posterior genotype probabilities", required=true) public RodBinding beagleProbs; + /** + * By default, all genotypes will be marked in the VCF as "phased", using the "|" separator after Beagle. + */ @Input(fullName="beaglePhased", shortName = "beaglePhased", doc="Beagle-produced .phased file containing phased genotypes", required=true) public RodBinding beaglePhased; @Output(doc="VCF File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; + /** + * If this argument is absent, and if Beagle determines that there is no sample in a site that has a variant genotype, the site will be marked as filtered (Default behavior). + * If the argument is present, the site won't be marked as filtered under this condition even if there are no variant genotypes. + */ @Argument(fullName="dont_mark_monomorphic_sites_as_filtered", shortName="keep_monomorphic", doc="If provided, we won't filter sites that beagle tags as monomorphic. Useful for imputing a sample's genotypes from a reference panel" ,required=false) public boolean DONT_FILTER_MONOMORPHIC_SITES = false; + /** + * Value between 0 and 1. If the probability of getting a genotype correctly (based on the posterior genotype probabilities and the actual genotype) is below this threshold, + * a genotype will be substitute by a no-call. + */ @Argument(fullName="no" + "call_threshold", shortName="ncthr", doc="Threshold of confidence at which a genotype won't be called", required=false) private double noCallThreshold = 0.0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 07793fd7b..87695077d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -112,6 +112,9 @@ public class ProduceBeagleInputWalker extends RodWalker { @Argument(fullName = "bootstrap_vcf",shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false) VCFWriter bootstrapVCFOutput = null; + /** + * If sample gender is known, this flag should be set to true to ensure that Beagle treats male Chr X properly. + */ @Argument(fullName = "checkIsMaleOnChrX", shortName = "checkIsMaleOnChrX", doc = "Set to true when Beagle-ing chrX and want to ensure male samples don't have heterozygous calls.", required = false) public boolean CHECK_IS_MALE_ON_CHR_X = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 7fe16c9df..3a18fe610 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -105,10 +105,19 @@ public class DepthOfCoverageWalker extends LocusWalker out; + /** + * Sets the low-coverage cutoff for granular binning. All loci with depth < START are counted in the first bin. + */ @Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false) int start = 1; + /** + * Sets the high-coverage cutoff for granular binning. All loci with depth > END are counted in the last bin. + */ @Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false) int stop = 500; + /** + * Sets the number of bins for granular binning + */ @Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false) int nBins = 499; @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to -1.", required = false) @@ -119,28 +128,59 @@ public class DepthOfCoverageWalker extends LocusWalker partitionTypes = EnumSet.of(DoCOutputType.Partition.sample); + /** + * Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output. + */ @Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false) boolean includeDeletions = false; @Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false) boolean ignoreDeletionSites = false; + + /** + * Path to the RefSeq file for use in aggregating coverage statistics over genes + */ @Argument(fullName = "calculateCoverageOverGenes", shortName = "geneList", doc = "Calculate the coverage statistics over this list of genes. Currently accepts RefSeq.", required = false) File refSeqGeneList = null; + /** + * The format of the output file + */ @Argument(fullName = "outputFormat", doc = "the format of the output file (e.g. csv, table, rtable); defaults to r-readable table", required = false) String outputFormat = "rtable"; + /** + * A coverage threshold for summarizing (e.g. % bases >= CT for each sample) + */ @Argument(fullName = "summaryCoverageThreshold", shortName = "ct", doc = "for summary file outputs, report the % of bases coverd to >= this number. Defaults to 15; can take multiple arguments.", required = false) int[] coverageThresholds = {15}; @@ -963,4 +1003,4 @@ class CoveragePartitioner { public Map> getIdentifiersByType() { return identifiersByType; } -} \ No newline at end of file +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 06455df6d..b1332bdf9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -39,10 +39,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.*; import java.io.PrintStream; import java.util.*; @@ -239,7 +236,8 @@ public class UnifiedGenotyperEngine { VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger, UAC.alleles); if ( vcInput == null ) return null; - vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()); + vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles(), InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, ref.getBase()); + } else { // deal with bad/non-standard reference bases if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 546bbe1a6..e5ad3106d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -77,6 +77,11 @@ import java.util.*; * if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic * if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains * only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords. + * + * If any of the general usage of this tool or any of the command-line arguments for this tool are not clear to you, + * please email asivache at broadinstitute dot org and he will gladly explain everything in more detail. + * + * */ @ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class}) public class SomaticIndelDetectorWalker extends ReadWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index f9bd019ea..01e8cd321 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -93,20 +93,30 @@ import java.util.List; */ @Requires(value={DataSource.REFERENCE}) public class ValidationAmplicons extends RodWalker { + /** + * A Table-formatted file listing amplicon contig, start, stop, and a name for the amplicon (or probe) + */ @Input(fullName = "ProbeIntervals", doc="A collection of intervals in table format with optional names that represent the "+ "intervals surrounding the probe sites amplicons should be designed for", required=true) RodBinding probeIntervals; - + /** + * A VCF file containing the bi-allelic sites for validation. Filtered records will prompt a warning, and will be flagged as filtered in the output fastq. + */ @Input(fullName = "ValidateAlleles", doc="A VCF containing the sites and alleles you want to validate. Restricted to *BI-Allelic* sites", required=true) RodBinding validateAlleles; - + /** + * A VCF file containing variants to be masked. A mask variant overlapping a validation site will be ignored at the validation site. + */ @Input(fullName = "MaskAlleles", doc="A VCF containing the sites you want to MASK from the designed amplicon (e.g. by Ns or lower-cased bases)", required=true) RodBinding maskAlleles; - @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false) boolean lowerCaseSNPs = false; + /** + * BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased. + * This changes the size of the k-mer used for alignment. + */ @Argument(doc="Size of the virtual primer to use for lower-casing regions with low specificity",fullName="virtualPrimerSize",required=false) int virtualPrimerSize = 20; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 613a31ed3..0d09b7033 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; +import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; @@ -24,6 +25,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -122,9 +124,6 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)", required=false) protected Boolean NO_STANDARD_STRATIFICATIONS = false; - @Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false) - protected Set typesToUse = null; - /** * See the -list argument to view available modules. */ @@ -150,9 +149,6 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false) protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; - @Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false) - private String TRANCHE_FILENAME = null; - @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false) private File ancestralAlignmentsFile = null; @@ -233,16 +229,6 @@ public class VariantEvalWalker extends RodWalker implements Tr jexlExpressions.add(sjexl); } - // Add select expressions for anything in the tranches file - if ( TRANCHE_FILENAME != null ) { - // we are going to build a few select names automatically from the tranches file - for ( Tranche t : Tranche.readTranches(new File(TRANCHE_FILENAME)) ) { - logger.info("Adding select for all variant above the pCut of : " + t); - SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod)); - SELECT_NAMES.add(String.format("TS-%.2f", t.ts)); - } - } - // Initialize the set of stratifications and evaluations to use stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); Set> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); @@ -317,9 +303,9 @@ public class VariantEvalWalker extends RodWalker implements Tr // find the comp final VariantContext comp = findMatchingComp(eval, compSet); - HashMap> stateMap = new HashMap>(); + HashMap> stateMap = new HashMap>(); for ( VariantStratifier vs : stratificationObjects ) { - ArrayList states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName); + List states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName); stateMap.put(vs, states); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index 2ea64c49c..5ccacac37 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -75,7 +75,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { } public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - boolean evalIsGood = eval != null && eval.isVariant(); + boolean evalIsGood = eval != null && eval.isPolymorphic(); boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType()); if (compIsGood) nCompVariants++; // count the number of comp events diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index 59ef3d992..2913c97a6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -100,11 +100,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval { // So in order to maintain consistency with the previous implementation (and the intention of the original author), I've // added in a proxy check for monomorphic status here. // Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call. - if ( !vc1.isVariant() || (vc1.hasGenotypes() && vc1.getHomRefCount() + vc1.getNoCallCount() == vc1.getNSamples()) ) { + if ( vc1.isMonomorphic() ) { nRefLoci++; } else { switch (vc1.getType()) { case NO_VARIATION: + // shouldn't get here break; case SNP: nVariantLoci++; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java index 35fffd815..ffe7c185f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java @@ -90,18 +90,19 @@ public class IndelLengthHistogram extends VariantEvaluator { public int getComparisonOrder() { return 1; } // need only the evals public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( ! vc1.isBiallelic() && vc1.isIndel() ) { - //veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored."); - return vc1.toString(); // biallelic sites are output - } - if ( vc1.isIndel() ) { + if ( vc1.isIndel() && vc1.isPolymorphic() ) { + + if ( ! vc1.isBiallelic() ) { + //veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored."); + return vc1.toString(); // biallelic sites are output + } + + // only count simple insertions/deletions, not complex indels if ( vc1.isSimpleInsertion() ) { indelHistogram.update(vc1.getAlternateAllele(0).length()); } else if ( vc1.isSimpleDeletion() ) { indelHistogram.update(-vc1.getReference().length()); - } else { - throw new ReviewedStingException("Indel type that is not insertion or deletion."); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java index fc347339d..f70e6c2de 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java @@ -270,7 +270,7 @@ public class IndelStatistics extends VariantEvaluator { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (eval != null ) { + if (eval != null && eval.isPolymorphic()) { if ( indelStats == null ) { indelStats = new IndelStats(eval); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java index d466645ea..203c15a85 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java @@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval } } - if ( eval.isSNP() && eval.isBiallelic() && metrics != null ) { + if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) { metrics.incrValue(eval); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index ec43cbd55..e51623c3c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -37,77 +37,74 @@ public class ThetaVariantEvaluator extends VariantEvaluator { } public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) { + if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) { return null; //no interesting sites } - if (vc.hasGenotypes()) { + //this maps allele to a count + ConcurrentMap alleleCounts = new ConcurrentHashMap(); - //this maps allele to a count - ConcurrentMap alleleCounts = new ConcurrentHashMap(); + int numHetsHere = 0; + float numGenosHere = 0; + int numIndsHere = 0; - int numHetsHere = 0; - float numGenosHere = 0; - int numIndsHere = 0; + for (Genotype genotype : vc.getGenotypes().values()) { + numIndsHere++; + if (!genotype.isNoCall()) { + //increment stats for heterozygosity + if (genotype.isHet()) { + numHetsHere++; + } - for (Genotype genotype : vc.getGenotypes().values()) { - numIndsHere++; - if (!genotype.isNoCall()) { - //increment stats for heterozygosity - if (genotype.isHet()) { - numHetsHere++; - } + numGenosHere++; + //increment stats for pairwise mismatches - numGenosHere++; - //increment stats for pairwise mismatches - - for (Allele allele : genotype.getAlleles()) { - if (allele.isNonNull() && allele.isCalled()) { - String alleleString = allele.toString(); - alleleCounts.putIfAbsent(alleleString, 0); - alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1); - } + for (Allele allele : genotype.getAlleles()) { + if (allele.isNonNull() && allele.isCalled()) { + String alleleString = allele.toString(); + alleleCounts.putIfAbsent(alleleString, 0); + alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1); } } } - if (numGenosHere > 0) { - //only if have one called genotype at least - this.numSites++; + } + if (numGenosHere > 0) { + //only if have one called genotype at least + this.numSites++; - this.totalHet += numHetsHere / numGenosHere; + this.totalHet += numHetsHere / numGenosHere; - //compute based on num sites - float harmonicFactor = 0; - for (int i = 1; i <= numIndsHere; i++) { - harmonicFactor += 1.0 / i; - } - this.thetaRegionNumSites += 1.0 / harmonicFactor; + //compute based on num sites + float harmonicFactor = 0; + for (int i = 1; i <= numIndsHere; i++) { + harmonicFactor += 1.0 / i; + } + this.thetaRegionNumSites += 1.0 / harmonicFactor; - //now compute pairwise mismatches - float numPairwise = 0; - float numDiffs = 0; - for (String allele1 : alleleCounts.keySet()) { - int allele1Count = alleleCounts.get(allele1); + //now compute pairwise mismatches + float numPairwise = 0; + float numDiffs = 0; + for (String allele1 : alleleCounts.keySet()) { + int allele1Count = alleleCounts.get(allele1); - for (String allele2 : alleleCounts.keySet()) { - if (allele1.compareTo(allele2) < 0) { - continue; - } - if (allele1 .compareTo(allele2) == 0) { - numPairwise += allele1Count * (allele1Count - 1) * .5; + for (String allele2 : alleleCounts.keySet()) { + if (allele1.compareTo(allele2) < 0) { + continue; + } + if (allele1 .compareTo(allele2) == 0) { + numPairwise += allele1Count * (allele1Count - 1) * .5; - } - else { - int allele2Count = alleleCounts.get(allele2); - numPairwise += allele1Count * allele2Count; - numDiffs += allele1Count * allele2Count; - } + } + else { + int allele2Count = alleleCounts.get(allele2); + numPairwise += allele1Count * allele2Count; + numDiffs += allele1Count * allele2Count; } } + } - if (numPairwise > 0) { - this.totalAvgDiffs += numDiffs / numPairwise; - } + if (numPairwise > 0) { + this.totalAvgDiffs += numDiffs / numPairwise; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index be957abd7..1feb37e01 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv } public void updateTiTv(VariantContext vc, boolean updateStandard) { - if (vc != null && vc.isSNP() && vc.isBiallelic()) { + if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) { if (VariantContextUtils.isTransition(vc)) { if (updateStandard) nTiInComp++; else nTi++; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java index 9c331b577..307b4f684 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java @@ -117,7 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval { public SiteStatus calcSiteStatus(VariantContext vc) { if ( vc == null ) return SiteStatus.NO_CALL; if ( vc.isFiltered() ) return SiteStatus.FILTERED; - if ( ! vc.isVariant() ) return SiteStatus.MONO; + if ( vc.isMonomorphic() ) return SiteStatus.MONO; + if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphic was false and there are genotypes if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { int ac = 0; @@ -132,8 +133,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval { else ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY); return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO; - } else if ( vc.hasGenotypes() ) { - return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO; } else { return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do //return SiteStatus.NO_CALL; // we can't figure out what to do diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java index b6ad55b18..263227938 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java @@ -232,7 +232,7 @@ public class VariantQualityScore extends VariantEvaluator { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { final String interesting = null; - if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) + if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) if( titvStats == null ) { titvStats = new TiTvStats(); } titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 5cdea4e00..3cc22cc52 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -10,10 +10,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.List; +/** + * Stratifies the eval RODs by the allele count of the alternate allele + * + * Looks at the AC value in the INFO field, and uses that value if present. If absent, + * computes the AC from the genotypes themselves. For no AC can be computed, 0 is used. + */ public class AlleleCount extends VariantStratifier { - // needs to know the variant context - private ArrayList states = new ArrayList(); - @Override public void initialize() { List> evals = getVariantEvalWalker().getEvals(); @@ -35,11 +38,7 @@ public class AlleleCount extends VariantStratifier { getVariantEvalWalker().getLogger().info("AlleleCount using " + nchrom + " chromosomes"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(1); if (eval != null) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java index 96d9f30ec..3d2dda651 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java @@ -6,11 +6,15 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies the eval RODs by the allele frequency of the alternate allele + * + * Uses a constant 0.005 frequency grid, and projects the AF INFO field value. Requires + * that AF be present in every ROD, otherwise this stratification throws an exception + */ public class AlleleFrequency extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { states = new ArrayList(); @@ -19,11 +23,7 @@ public class AlleleFrequency extends VariantStratifier { } } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); if (eval != null) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java index 9f4123589..1f31ebfa7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java @@ -6,22 +6,21 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; + +/** + * Required stratification grouping output by each comp ROD + */ public class CompRod extends VariantStratifier implements RequiredStratification { - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); for ( RodBinding rod : getVariantEvalWalker().getComps() ) states.add(rod.getName()); } - public ArrayList getAllStates() { - return states; - } - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(compName); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java index e12a1ba97..c45a73231 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java @@ -5,23 +5,19 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies the evaluation by each contig in the reference sequence + */ public class Contig extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.addAll(getVariantEvalWalker().getContigNames()); states.add("all"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); if (eval != null) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java index ff49c8ba9..539cd21ef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; /** * CpG is a stratification module for VariantEval that divides the input data by within/not within a CpG site @@ -19,21 +20,14 @@ import java.util.ArrayList; * A CpG site is defined as a site where the reference base at a locus is a C and the adjacent reference base in the 3' direction is a G. */ public class CpG extends VariantStratifier { - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.add("all"); states.add("CpG"); states.add("non_CpG"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { boolean isCpG = false; if (ref != null && ref.getBases() != null) { String fwRefBases = new String(ref.getBases()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java index cc878e975..3223626c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java @@ -7,10 +7,12 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; +/** + * Experimental stratification by the degeneracy of an amino acid, according to VCF annotation. Not safe + */ public class Degeneracy extends VariantStratifier { - private ArrayList states; - private HashMap> degeneracies; @Override @@ -77,11 +79,7 @@ public class Degeneracy extends VariantStratifier { } } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index 0bfecee25..e276adc32 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -6,10 +6,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Required stratification grouping output by each eval ROD + */ public class EvalRod extends VariantStratifier implements RequiredStratification { - private ArrayList states; - @Override public void initialize() { states = new ArrayList(); @@ -17,11 +19,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification states.add(rod.getName()); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(evalName); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java index 3e3cbc224..aacfae993 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java @@ -5,24 +5,20 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies by the FILTER status (PASS, FAIL) of the eval records + */ public class Filter extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.add("called"); states.add("filtered"); states.add("raw"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("raw"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index 0de871fe6..193a65591 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -5,25 +5,22 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation. + */ public class FunctionalClass extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.add("all"); states.add("silent"); states.add("missense"); states.add("nonsense"); } - public ArrayList getAllStates() { - return states; - } - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java index 59b991c4d..c0cab4534 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java @@ -6,30 +6,30 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatc import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import java.util.List; import java.util.ArrayList; import java.util.Set; +/** + * Stratifies the eval RODs by user-supplied JEXL expressions + * + * See http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions for more details + */ public class JexlExpression extends VariantStratifier implements StandardStratification { // needs to know the jexl expressions private Set jexlExpressions; - private ArrayList states; @Override public void initialize() { jexlExpressions = getVariantEvalWalker().getJexlExpressions(); - states = new ArrayList(); states.add("none"); for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) { states.add(jexlExpression.name); } } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("none"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index a3810a4c0..77d98d33b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -7,32 +7,31 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * Stratifies by whether a site in in the list of known RODs (e.g., dbsnp by default) + */ public class Novelty extends VariantStratifier implements StandardStratification { // needs the variant contexts and known names private List> knowns; - final private ArrayList states = new ArrayList(Arrays.asList("all", "known", "novel")); @Override public void initialize() { + states = new ArrayList(Arrays.asList("all", "known", "novel")); knowns = getVariantEvalWalker().getKnowns(); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { if (tracker != null && eval != null) { final Collection knownComps = tracker.getValues(knowns, ref.getLocus()); for ( final VariantContext c : knownComps ) { // loop over sites, looking for something that matches the type eval if ( eval.getType() == c.getType() ) { - return new ArrayList(Arrays.asList("all", "known")); + return Arrays.asList("all", "known"); } } } - return new ArrayList(Arrays.asList("all", "novel")); + return Arrays.asList("all", "novel"); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java index b714fa291..c697b5b7a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java @@ -4,26 +4,23 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +/** + * Stratifies the eval RODs by each sample in the eval ROD. + * + * This allows the system to analyze each sample separately. Since many evaluations + * only consider non-reference sites, stratifying by sample results in meaningful + * calculations for CompOverlap + */ public class Sample extends VariantStratifier { - // needs the sample names - private ArrayList samples; - @Override public void initialize() { - samples = new ArrayList(); - samples.addAll(getVariantEvalWalker().getSampleNamesForStratification()); + states.addAll(getVariantEvalWalker().getSampleNamesForStratification()); } - public ArrayList getAllStates() { - return samples; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { - ArrayList relevantStates = new ArrayList(); - relevantStates.add(sampleName); - - return relevantStates; + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + return Arrays.asList(sampleName); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index df6523207..5cae2fb15 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -6,9 +6,12 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; public abstract class VariantStratifier implements Comparable { private VariantEvalWalker variantEvalWalker; + protected ArrayList states = new ArrayList(); /** * @return a reference to the parent VariantEvalWalker running this stratification @@ -27,15 +30,15 @@ public abstract class VariantStratifier implements Comparable { public abstract void initialize(); - public ArrayList getAllStates() { - return new ArrayList(); - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { return null; } public int compareTo(Object o1) { return this.getClass().getSimpleName().compareTo(o1.getClass().getSimpleName()); } + + public ArrayList getAllStates() { + return states; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java new file mode 100644 index 000000000..7d25498a5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Stratifies the eval variants by their type (SNP, INDEL, ETC) + */ +public class VariantType extends VariantStratifier { + @Override + public void initialize() { + for ( VariantContext.Type t : VariantContext.Type.values() ) { + states.add(t.toString()); + } + } + + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + return eval == null ? Collections.emptyList() : Arrays.asList(eval.getType().toString()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index f31dd9f9f..92e7c6554 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -266,10 +266,7 @@ public class VariantEvalUtils { * @return a new VariantContext with just the requested sample */ public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) { - ArrayList sampleNames = new ArrayList(); - sampleNames.add(sampleName); - - return getSubsetOfVariantContext(vc, sampleNames); + return getSubsetOfVariantContext(vc, Arrays.asList(sampleName)); } /** @@ -371,12 +368,12 @@ public class VariantEvalUtils { * @param stateKeys all the state keys * @return a list of state keys */ - public ArrayList initializeStateKeys(HashMap> stateMap, Stack>> stateStack, StateKey stateKey, ArrayList stateKeys) { + public ArrayList initializeStateKeys(HashMap> stateMap, Stack>> stateStack, StateKey stateKey, ArrayList stateKeys) { if (stateStack == null) { - stateStack = new Stack>>(); + stateStack = new Stack>>(); for (VariantStratifier vs : stateMap.keySet()) { - HashMap> oneSetOfStates = new HashMap>(); + HashMap> oneSetOfStates = new HashMap>(); oneSetOfStates.put(vs, stateMap.get(vs)); stateStack.add(oneSetOfStates); @@ -384,10 +381,10 @@ public class VariantEvalUtils { } if (!stateStack.isEmpty()) { - Stack>> newStateStack = new Stack>>(); + Stack>> newStateStack = new Stack>>(); newStateStack.addAll(stateStack); - HashMap> oneSetOfStates = newStateStack.pop(); + HashMap> oneSetOfStates = newStateStack.pop(); VariantStratifier vs = oneSetOfStates.keySet().iterator().next(); for (String state : oneSetOfStates.get(vs)) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index bb3cd82a1..35ff66243 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -214,7 +214,7 @@ public class SelectVariants extends RodWalker { @Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false) public Set sampleExpressions ; - @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false) + @Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false) public Set sampleFiles; /** @@ -226,7 +226,7 @@ public class SelectVariants extends RodWalker { /** * Note that sample exclusion takes precedence over inclusion, so that if a sample is in both lists it will be excluded. */ - @Argument(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false) + @Input(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false) public Set XLsampleFiles = new HashSet(0); /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index c0f695966..2c7902914 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.TribbleException; -import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; @@ -41,7 +40,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.util.Collection; import java.util.HashSet; -import java.util.List; import java.util.Set; @@ -168,14 +166,9 @@ public class ValidateVariants extends RodWalker { // get the RS IDs Set rsIDs = null; if ( tracker.hasValues(dbsnp.dbsnp) ) { - List dbsnpList = tracker.getValues(dbsnp.dbsnp, ref.getLocus()); rsIDs = new HashSet(); - for ( Object d : dbsnpList ) { - if (d instanceof DbSNPFeature ) - rsIDs.add(((DbSNPFeature)d).getRsID()); - else if (d instanceof VariantContext ) - rsIDs.add(((VariantContext)d).getID()); - } + for ( VariantContext rsID : tracker.getValues(dbsnp.dbsnp, ref.getLocus()) ) + rsIDs.add(rsID.getID()); } try { diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index be045309a..26c25850a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -63,6 +63,10 @@ public class ReadClipper { if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); + // TODO add requires statement/check in the Hardclip function + if ( start > stop ) + stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); + //System.out.println("Clipping start/stop: " + start + "/" + stop); this.addOp(new ClippingOp(start, stop)); SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 66e1afecb..12899e898 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -82,7 +82,7 @@ public class PileupElement { // -------------------------------------------------------------------------- private Integer getReducedReadQualityTagValue() { - return (Integer)getRead().getAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); + return getRead().getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); } public boolean isReducedRead() { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 517f9f75d..c55a462f1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -41,6 +41,10 @@ public class GATKSAMRecord extends SAMRecord { // because some values can be null, we don't want to duplicate effort private boolean retrievedReadGroup = false; + /** A private cache for the reduced read quality. Null indicates the value hasn't be fetched yet or isn't available */ + private boolean lookedUpReducedReadQuality = false; + private Integer reducedReadQuality; + // These temporary attributes were added here to make life easier for // certain algorithms by providing a way to label or attach arbitrary data to // individual GATKSAMRecords. @@ -338,7 +342,17 @@ public class GATKSAMRecord extends SAMRecord { public Object getAttribute(final String tag) { return mRecord.getAttribute(tag); } - public Integer getIntegerAttribute(final String tag) { return mRecord.getIntegerAttribute(tag); } + public Integer getIntegerAttribute(final String tag) { + if ( tag == ReadUtils.REDUCED_READ_QUALITY_TAG ) { + if ( ! lookedUpReducedReadQuality ) { + lookedUpReducedReadQuality = true; + reducedReadQuality = mRecord.getIntegerAttribute(tag); + } + return reducedReadQuality; + } else { + return mRecord.getIntegerAttribute(tag); + } + } public Short getShortAttribute(final String tag) { return mRecord.getShortAttribute(tag); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 673fe4529..699133e38 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -983,7 +983,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @return true if it's monomorphic */ public boolean isMonomorphic() { - return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount(); + return ! isVariant() || (hasGenotypes() && getHomRefCount() + getNoCallCount() == getNSamples()); } /** diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index c0d32a05b..7f4d96add 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -127,6 +127,7 @@ public class TraverseReadsUnitTest extends BaseTest { Object accumulator = countReadWalker.reduceInit(); while (shardStrategy.hasNext()) { + traversalEngine.startTimersIfNecessary(); Shard shard = shardStrategy.next(); if (shard == null) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 3503a2353..6c4393d6a 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -261,10 +261,10 @@ public class VariantEvalIntegrationTest extends WalkerTest { return String.format("%s -select '%s' -selectName %s", cmd, select, name); } - @Test + @Test(enabled = false) // no longer supported in the GATK public void testTranches() { String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6af2b9959aa1778a5b712536de453952")); executeTestParallel("testTranches",spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java index e740acf05..95fafac8d 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java @@ -23,7 +23,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --variant:dbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" + + " --variant:OldDbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" + " -T VariantsToVCF" + " -L 1:1-30,000,000" + " -o %s" + diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index b0c23e6b1..f97ce4884 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -11,7 +11,7 @@ import net.sf.samtools.SAMFileReader import net.sf.samtools.SAMFileHeader.SortOrder import org.broadinstitute.sting.queue.util.QScriptUtils -import org.broadinstitute.sting.queue.function.{CommandLineFunction, ListWriterFunction} +import org.broadinstitute.sting.queue.function.ListWriterFunction class DataProcessingPipeline extends QScript { qscript => @@ -31,19 +31,14 @@ class DataProcessingPipeline extends QScript { var reference: File = _ @Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true) - var dbSNP: File = _ + var dbSNP: List[File] = List() /**************************************************************************** * Optional Parameters ****************************************************************************/ - -// @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false) -// var sortSamJar: File = _ -// - @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false) - var indels: File = _ + var indels: List[File] = List() @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false) var bwaPath: File = _ @@ -132,24 +127,6 @@ class DataProcessingPipeline extends QScript { } } return sampleTable.toMap - -// println("\n\n*** INPUT FILES ***\n") -// // Creating one file for each sample in the dataset -// val sampleBamFiles = scala.collection.mutable.Map.empty[String, File] -// for ((sample, flist) <- sampleTable) { -// -// println(sample + ":") -// for (f <- flist) -// println (f) -// println() -// -// val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list") -// sampleBamFiles(sample) = sampleFileName -// //add(writeList(flist, sampleFileName)) -// } -// println("*** INPUT FILES ***\n\n") -// -// return sampleBamFiles.toMap } // Rebuilds the Read Group string to give BWA @@ -321,9 +298,9 @@ class DataProcessingPipeline extends QScript { this.input_file = inBams this.out = outIntervals this.mismatchFraction = 0.0 - this.known :+= qscript.dbSNP + this.known ++= qscript.dbSNP if (indels != null) - this.known :+= qscript.indels + this.known ++= qscript.indels this.scatterCount = nContigs this.analysisName = queueLogDir + outIntervals + ".target" this.jobName = queueLogDir + outIntervals + ".target" @@ -333,9 +310,9 @@ class DataProcessingPipeline extends QScript { this.input_file = inBams this.targetIntervals = tIntervals this.out = outBam - this.known :+= qscript.dbSNP + this.known ++= qscript.dbSNP if (qscript.indels != null) - this.known :+= qscript.indels + this.known ++= qscript.indels this.consensusDeterminationModel = cleanModelEnum this.compress = 0 this.scatterCount = nContigs @@ -344,7 +321,7 @@ class DataProcessingPipeline extends QScript { } case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs { - this.knownSites :+= qscript.dbSNP + this.knownSites ++= qscript.dbSNP this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") this.input_file :+= inBam this.recal_file = outRecalFile diff --git a/settings/repository/org.broad/tribble-23.jar b/settings/repository/org.broad/tribble-24.jar similarity index 93% rename from settings/repository/org.broad/tribble-23.jar rename to settings/repository/org.broad/tribble-24.jar index 40e2d4c5e..b1c39e60a 100644 Binary files a/settings/repository/org.broad/tribble-23.jar and b/settings/repository/org.broad/tribble-24.jar differ diff --git a/settings/repository/org.broad/tribble-23.xml b/settings/repository/org.broad/tribble-24.xml similarity index 51% rename from settings/repository/org.broad/tribble-23.xml rename to settings/repository/org.broad/tribble-24.xml index 2f6a16a03..9b2b967f8 100644 --- a/settings/repository/org.broad/tribble-23.xml +++ b/settings/repository/org.broad/tribble-24.xml @@ -1,3 +1,3 @@ - +