diff --git a/public/R/queueJobReport.R b/public/R/queueJobReport.R index 18d33054e..a24d269c9 100644 --- a/public/R/queueJobReport.R +++ b/public/R/queueJobReport.R @@ -12,7 +12,9 @@ if ( onCMDLine ) { inputFileName = args[1] outputPDF = args[2] } else { - inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt" + #inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt" + inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt" + #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt" outputPDF = NA } @@ -113,11 +115,22 @@ plotGroup <- function(groupTable) { textplot(as.data.frame(sum), show.rownames=F) title(paste("Job summary for", name, "itemizing each iteration"), cex=3) + # histogram of job times by groupAnnotations + if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) { + # todo -- how do we group by annotations? + p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram() + p <- p + xlab("runtime in seconds") + ylab("No. of jobs") + p <- p + opts(title=paste("Job runtime histogram for", name)) + print(p) + } + # as above, but averaging over all iterations groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") - sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) - textplot(as.data.frame(sum), show.rownames=F) - title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + if ( dim(sub)[1] > 1 ) { + sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) + textplot(as.data.frame(sum), show.rownames=F) + title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + } } # print out some useful basic information @@ -146,7 +159,7 @@ plotJobsGantt(gatkReportData, T) plotJobsGantt(gatkReportData, F) plotProgressByTime(gatkReportData) for ( group in gatkReportData ) { - plotGroup(group) + plotGroup(group) } if ( ! is.na(outputPDF) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index bf490e28c..7bf518fd5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.refdata; import net.sf.samtools.util.SequenceUtil; import org.broad.tribble.Feature; import org.broad.tribble.annotation.Strand; -import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broad.tribble.dbsnp.OldDbSNPFeature; import org.broad.tribble.gelitext.GeliTextFeature; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -93,27 +93,27 @@ public class VariantContextAdaptors { // -------------------------------------------------------------------------------------------------------------- private static class DBSnpAdaptor implements VCAdaptor { - private static boolean isSNP(DbSNPFeature feature) { + private static boolean isSNP(OldDbSNPFeature feature) { return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact"); } - private static boolean isMNP(DbSNPFeature feature) { + private static boolean isMNP(OldDbSNPFeature feature) { return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range"); } - private static boolean isInsertion(DbSNPFeature feature) { + private static boolean isInsertion(OldDbSNPFeature feature) { return feature.getVariantType().contains("insertion"); } - private static boolean isDeletion(DbSNPFeature feature) { + private static boolean isDeletion(OldDbSNPFeature feature) { return feature.getVariantType().contains("deletion"); } - private static boolean isIndel(DbSNPFeature feature) { + private static boolean isIndel(OldDbSNPFeature feature) { return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature); } - public static boolean isComplexIndel(DbSNPFeature feature) { + public static boolean isComplexIndel(OldDbSNPFeature feature) { return feature.getVariantType().contains("in-del"); } @@ -125,7 +125,7 @@ public class VariantContextAdaptors { * * @return an alternate allele list */ - public static List getAlternateAlleleList(DbSNPFeature feature) { + public static List getAlternateAlleleList(OldDbSNPFeature feature) { List ret = new ArrayList(); for (String allele : getAlleleList(feature)) if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele); @@ -139,7 +139,7 @@ public class VariantContextAdaptors { * * @return an alternate allele list */ - public static List getAlleleList(DbSNPFeature feature) { + public static List getAlleleList(OldDbSNPFeature feature) { List alleleList = new ArrayList(); // add ref first if ( feature.getStrand() == Strand.POSITIVE ) @@ -156,14 +156,14 @@ public class VariantContextAdaptors { /** * Converts non-VCF formatted dbSNP records to VariantContext. - * @return DbSNPFeature. + * @return OldDbSNPFeature. */ @Override - public Class getAdaptableFeatureType() { return DbSNPFeature.class; } + public Class getAdaptableFeatureType() { return OldDbSNPFeature.class; } @Override public VariantContext convert(String name, Object input, ReferenceContext ref) { - DbSNPFeature dbsnp = (DbSNPFeature)input; + OldDbSNPFeature dbsnp = (OldDbSNPFeature)input; if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) return null; Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java index a97f3211c..7aa112961 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -26,8 +26,11 @@ package org.broadinstitute.sting.gatk.refdata.tracks; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.util.CloseableIterator; import org.apache.log4j.Logger; +import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureSource; +import org.broad.tribble.iterators.CloseableTribbleIterator; +import org.broad.tribble.source.PerformanceLoggingFeatureSource; import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.utils.GenomeLoc; @@ -47,7 +50,6 @@ import java.io.IOException; */ public class RMDTrack { private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class); - private final static boolean DEBUG = false; // the basics of a track: private final Class type; // our type @@ -113,8 +115,10 @@ public class RMDTrack { } public CloseableIterator query(GenomeLoc interval) throws IOException { - if ( DEBUG ) logger.debug("Issuing query for %s: " + interval); - return new FeatureToGATKFeatureIterator(genomeLocParser, reader.query(interval.getContig(),interval.getStart(),interval.getStop()), this.getName()); + CloseableTribbleIterator iter = reader.query(interval.getContig(),interval.getStart(),interval.getStop()); + if ( RMDTrackBuilder.MEASURE_TRIBBLE_QUERY_PERFORMANCE ) + logger.warn("Query " + getName() + ":" + ((PerformanceLoggingFeatureSource)reader).getPerformanceLog()); + return new FeatureToGATKFeatureIterator(genomeLocParser, iter, this.getName()); } public void close() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index d352894e8..06d05912a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -27,20 +27,21 @@ package org.broadinstitute.sting.gatk.refdata.tracks; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; -import org.broad.tribble.*; +import org.broad.tribble.FeatureCodec; +import org.broad.tribble.FeatureSource; +import org.broad.tribble.Tribble; +import org.broad.tribble.TribbleException; import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; import org.broad.tribble.source.BasicFeatureSource; +import org.broad.tribble.source.PerformanceLoggingFeatureSource; import org.broad.tribble.util.LittleEndianOutputStream; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SequenceDictionaryUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -51,7 +52,10 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; -import java.util.*; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; @@ -70,6 +74,7 @@ public class RMDTrackBuilder { // extends PluginManager { * our log, which we use to capture anything from this class */ private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class); + public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false; // a constant we use for marking sequence dictionary entries in the Tribble index property list public static final String SequenceDictionaryPropertyPredicate = "DICT:"; @@ -214,7 +219,10 @@ public class RMDTrackBuilder { // extends PluginManager { sequenceDictionary = getSequenceDictionaryFromProperties(index); } - featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name)); + if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE ) + featureSource = new PerformanceLoggingFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name)); + else + featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name)); } catch (TribbleException e) { throw new UserException(e.getMessage()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 7fe16c9df..3a18fe610 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -105,10 +105,19 @@ public class DepthOfCoverageWalker extends LocusWalker out; + /** + * Sets the low-coverage cutoff for granular binning. All loci with depth < START are counted in the first bin. + */ @Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false) int start = 1; + /** + * Sets the high-coverage cutoff for granular binning. All loci with depth > END are counted in the last bin. + */ @Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false) int stop = 500; + /** + * Sets the number of bins for granular binning + */ @Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false) int nBins = 499; @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to -1.", required = false) @@ -119,28 +128,59 @@ public class DepthOfCoverageWalker extends LocusWalker partitionTypes = EnumSet.of(DoCOutputType.Partition.sample); + /** + * Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output. + */ @Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false) boolean includeDeletions = false; @Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false) boolean ignoreDeletionSites = false; + + /** + * Path to the RefSeq file for use in aggregating coverage statistics over genes + */ @Argument(fullName = "calculateCoverageOverGenes", shortName = "geneList", doc = "Calculate the coverage statistics over this list of genes. Currently accepts RefSeq.", required = false) File refSeqGeneList = null; + /** + * The format of the output file + */ @Argument(fullName = "outputFormat", doc = "the format of the output file (e.g. csv, table, rtable); defaults to r-readable table", required = false) String outputFormat = "rtable"; + /** + * A coverage threshold for summarizing (e.g. % bases >= CT for each sample) + */ @Argument(fullName = "summaryCoverageThreshold", shortName = "ct", doc = "for summary file outputs, report the % of bases coverd to >= this number. Defaults to 15; can take multiple arguments.", required = false) int[] coverageThresholds = {15}; @@ -963,4 +1003,4 @@ class CoveragePartitioner { public Map> getIdentifiersByType() { return identifiersByType; } -} \ No newline at end of file +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java new file mode 100644 index 000000000..0f1cea2e1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadLengthDistribution.java @@ -0,0 +1,101 @@ +package org.broadinstitute.sting.gatk.walkers.diagnostics; + +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; + +import java.io.PrintStream; +import java.util.List; + +/** + * Outputs the read lengths of all the reads in a file. + * + *

+ * Generates a table with the read lengths categorized per sample. If the file has no sample information + * (no read groups) it considers all reads to come from the same sample. + *

+ * + * + *

Input

+ *

+ * A BAM file. + *

+ * + *

Output

+ *

+ * A human/R readable table of tab separated values with one column per sample and one row per read. + *

+ * + *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T ReadLengthDistribution
+ *      -I example.bam
+ *      -R reference.fasta
+ *      -o example.tbl
+ *  
+ * + * @author Kiran Garimela + */ + + + +public class ReadLengthDistribution extends ReadWalker { + @Output + public PrintStream out; + + private GATKReport report; + + public void initialize() { + report = new GATKReport(); + report.addTable("ReadLengthDistribution", "Table of read length distributions"); + GATKReportTable table = report.getTable("ReadLengthDistribution"); + + table.addPrimaryKey("readLength"); + + List readGroups = getToolkit().getSAMFileHeader().getReadGroups(); + if (readGroups.isEmpty()) + table.addColumn("SINGLE_SAMPLE", 0); + + else + for (SAMReadGroupRecord rg : readGroups) + table.addColumn(rg.getSample(), 0); + + } + + public boolean filter(ReferenceContext ref, SAMRecord read) { + return ( !read.getReadPairedFlag() || read.getReadPairedFlag() && read.getFirstOfPairFlag()); + } + + @Override + public Integer map(ReferenceContext referenceContext, SAMRecord samRecord, ReadMetaDataTracker readMetaDataTracker) { + GATKReportTable table = report.getTable("ReadLengthDistribution"); + + int length = Math.abs(samRecord.getReadLength()); + String sample = samRecord.getReadGroup().getSample(); + + table.increment(length, sample); + + return null; + } + + @Override + public Integer reduceInit() { + return null; + } + + @Override + public Integer reduce(Integer integer, Integer integer1) { + return null; + } + + public void onTraversalDone(Integer sum) { + report.print(out); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index ae419a5c4..7b8045581 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -52,6 +52,11 @@ public class UnifiedArgumentCollection { @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY; + /** + * The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily + * distinguish between PCR errors vs. sequencing errors. The practical implication for this value is that it + * effectively acts as a cap on the base qualities. + */ @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false) public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 546bbe1a6..e5ad3106d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -77,6 +77,11 @@ import java.util.*; * if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic * if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains * only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords. + * + * If any of the general usage of this tool or any of the command-line arguments for this tool are not clear to you, + * please email asivache at broadinstitute dot org and he will gladly explain everything in more detail. + * + * */ @ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class}) public class SomaticIndelDetectorWalker extends ReadWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java index b616a0ebe..f416e94a0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java @@ -253,6 +253,13 @@ public class GenotypeAndValidateWalker extends RodWalker= 0) uac.MIN_BASE_QUALTY_SCORE = mbq; if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions; @@ -371,19 +383,26 @@ public class GenotypeAndValidateWalker extends RodWalker { + /** + * A Table-formatted file listing amplicon contig, start, stop, and a name for the amplicon (or probe) + */ @Input(fullName = "ProbeIntervals", doc="A collection of intervals in table format with optional names that represent the "+ "intervals surrounding the probe sites amplicons should be designed for", required=true) RodBinding probeIntervals; - + /** + * A VCF file containing the bi-allelic sites for validation. Filtered records will prompt a warning, and will be flagged as filtered in the output fastq. + */ @Input(fullName = "ValidateAlleles", doc="A VCF containing the sites and alleles you want to validate. Restricted to *BI-Allelic* sites", required=true) RodBinding validateAlleles; - + /** + * A VCF file containing variants to be masked. A mask variant overlapping a validation site will be ignored at the validation site. + */ @Input(fullName = "MaskAlleles", doc="A VCF containing the sites you want to MASK from the designed amplicon (e.g. by Ns or lower-cased bases)", required=true) RodBinding maskAlleles; - @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false) boolean lowerCaseSNPs = false; + /** + * BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased. + * This changes the size of the k-mer used for alignment. + */ @Argument(doc="Size of the virtual primer to use for lower-casing regions with low specificity",fullName="virtualPrimerSize",required=false) int virtualPrimerSize = 20; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 613a31ed3..fe4729bdc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -122,9 +122,6 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)", required=false) protected Boolean NO_STANDARD_STRATIFICATIONS = false; - @Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false) - protected Set typesToUse = null; - /** * See the -list argument to view available modules. */ @@ -317,9 +314,9 @@ public class VariantEvalWalker extends RodWalker implements Tr // find the comp final VariantContext comp = findMatchingComp(eval, compSet); - HashMap> stateMap = new HashMap>(); + HashMap> stateMap = new HashMap>(); for ( VariantStratifier vs : stratificationObjects ) { - ArrayList states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName); + List states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName); stateMap.put(vs, states); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 5cdea4e00..3cc22cc52 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -10,10 +10,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.List; +/** + * Stratifies the eval RODs by the allele count of the alternate allele + * + * Looks at the AC value in the INFO field, and uses that value if present. If absent, + * computes the AC from the genotypes themselves. For no AC can be computed, 0 is used. + */ public class AlleleCount extends VariantStratifier { - // needs to know the variant context - private ArrayList states = new ArrayList(); - @Override public void initialize() { List> evals = getVariantEvalWalker().getEvals(); @@ -35,11 +38,7 @@ public class AlleleCount extends VariantStratifier { getVariantEvalWalker().getLogger().info("AlleleCount using " + nchrom + " chromosomes"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(1); if (eval != null) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java index 96d9f30ec..3d2dda651 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java @@ -6,11 +6,15 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies the eval RODs by the allele frequency of the alternate allele + * + * Uses a constant 0.005 frequency grid, and projects the AF INFO field value. Requires + * that AF be present in every ROD, otherwise this stratification throws an exception + */ public class AlleleFrequency extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { states = new ArrayList(); @@ -19,11 +23,7 @@ public class AlleleFrequency extends VariantStratifier { } } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); if (eval != null) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java index 9f4123589..1f31ebfa7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java @@ -6,22 +6,21 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; + +/** + * Required stratification grouping output by each comp ROD + */ public class CompRod extends VariantStratifier implements RequiredStratification { - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); for ( RodBinding rod : getVariantEvalWalker().getComps() ) states.add(rod.getName()); } - public ArrayList getAllStates() { - return states; - } - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(compName); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java index e12a1ba97..c45a73231 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java @@ -5,23 +5,19 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies the evaluation by each contig in the reference sequence + */ public class Contig extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.addAll(getVariantEvalWalker().getContigNames()); states.add("all"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); if (eval != null) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java index ff49c8ba9..539cd21ef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; /** * CpG is a stratification module for VariantEval that divides the input data by within/not within a CpG site @@ -19,21 +20,14 @@ import java.util.ArrayList; * A CpG site is defined as a site where the reference base at a locus is a C and the adjacent reference base in the 3' direction is a G. */ public class CpG extends VariantStratifier { - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.add("all"); states.add("CpG"); states.add("non_CpG"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { boolean isCpG = false; if (ref != null && ref.getBases() != null) { String fwRefBases = new String(ref.getBases()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java index cc878e975..3223626c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java @@ -7,10 +7,12 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; +/** + * Experimental stratification by the degeneracy of an amino acid, according to VCF annotation. Not safe + */ public class Degeneracy extends VariantStratifier { - private ArrayList states; - private HashMap> degeneracies; @Override @@ -77,11 +79,7 @@ public class Degeneracy extends VariantStratifier { } } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index 0bfecee25..e276adc32 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -6,10 +6,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Required stratification grouping output by each eval ROD + */ public class EvalRod extends VariantStratifier implements RequiredStratification { - private ArrayList states; - @Override public void initialize() { states = new ArrayList(); @@ -17,11 +19,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification states.add(rod.getName()); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(evalName); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java index 3e3cbc224..aacfae993 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java @@ -5,24 +5,20 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies by the FILTER status (PASS, FAIL) of the eval records + */ public class Filter extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.add("called"); states.add("filtered"); states.add("raw"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("raw"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index 0de871fe6..193a65591 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -5,25 +5,22 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation. + */ public class FunctionalClass extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.add("all"); states.add("silent"); states.add("missense"); states.add("nonsense"); } - public ArrayList getAllStates() { - return states; - } - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java index 59b991c4d..c0cab4534 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java @@ -6,30 +6,30 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatc import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import java.util.List; import java.util.ArrayList; import java.util.Set; +/** + * Stratifies the eval RODs by user-supplied JEXL expressions + * + * See http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions for more details + */ public class JexlExpression extends VariantStratifier implements StandardStratification { // needs to know the jexl expressions private Set jexlExpressions; - private ArrayList states; @Override public void initialize() { jexlExpressions = getVariantEvalWalker().getJexlExpressions(); - states = new ArrayList(); states.add("none"); for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) { states.add(jexlExpression.name); } } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("none"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index a3810a4c0..77d98d33b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -7,32 +7,31 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * Stratifies by whether a site in in the list of known RODs (e.g., dbsnp by default) + */ public class Novelty extends VariantStratifier implements StandardStratification { // needs the variant contexts and known names private List> knowns; - final private ArrayList states = new ArrayList(Arrays.asList("all", "known", "novel")); @Override public void initialize() { + states = new ArrayList(Arrays.asList("all", "known", "novel")); knowns = getVariantEvalWalker().getKnowns(); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { if (tracker != null && eval != null) { final Collection knownComps = tracker.getValues(knowns, ref.getLocus()); for ( final VariantContext c : knownComps ) { // loop over sites, looking for something that matches the type eval if ( eval.getType() == c.getType() ) { - return new ArrayList(Arrays.asList("all", "known")); + return Arrays.asList("all", "known"); } } } - return new ArrayList(Arrays.asList("all", "novel")); + return Arrays.asList("all", "novel"); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java index b714fa291..c697b5b7a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java @@ -4,26 +4,23 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +/** + * Stratifies the eval RODs by each sample in the eval ROD. + * + * This allows the system to analyze each sample separately. Since many evaluations + * only consider non-reference sites, stratifying by sample results in meaningful + * calculations for CompOverlap + */ public class Sample extends VariantStratifier { - // needs the sample names - private ArrayList samples; - @Override public void initialize() { - samples = new ArrayList(); - samples.addAll(getVariantEvalWalker().getSampleNamesForStratification()); + states.addAll(getVariantEvalWalker().getSampleNamesForStratification()); } - public ArrayList getAllStates() { - return samples; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { - ArrayList relevantStates = new ArrayList(); - relevantStates.add(sampleName); - - return relevantStates; + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + return Arrays.asList(sampleName); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index df6523207..5cae2fb15 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -6,9 +6,12 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; public abstract class VariantStratifier implements Comparable { private VariantEvalWalker variantEvalWalker; + protected ArrayList states = new ArrayList(); /** * @return a reference to the parent VariantEvalWalker running this stratification @@ -27,15 +30,15 @@ public abstract class VariantStratifier implements Comparable { public abstract void initialize(); - public ArrayList getAllStates() { - return new ArrayList(); - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { return null; } public int compareTo(Object o1) { return this.getClass().getSimpleName().compareTo(o1.getClass().getSimpleName()); } + + public ArrayList getAllStates() { + return states; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java new file mode 100644 index 000000000..7d25498a5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Stratifies the eval variants by their type (SNP, INDEL, ETC) + */ +public class VariantType extends VariantStratifier { + @Override + public void initialize() { + for ( VariantContext.Type t : VariantContext.Type.values() ) { + states.add(t.toString()); + } + } + + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + return eval == null ? Collections.emptyList() : Arrays.asList(eval.getType().toString()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index f31dd9f9f..3cc039141 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -266,10 +266,7 @@ public class VariantEvalUtils { * @return a new VariantContext with just the requested sample */ public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) { - ArrayList sampleNames = new ArrayList(); - sampleNames.add(sampleName); - - return getSubsetOfVariantContext(vc, sampleNames); + return getSubsetOfVariantContext(vc, Arrays.asList(sampleName)); } /** @@ -280,7 +277,7 @@ public class VariantEvalUtils { * @return a new VariantContext with just the requested samples */ public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection sampleNames) { - VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values(), vc.getAlleles()); + VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values()); HashMap newAts = new HashMap(vcsub.getAttributes()); @@ -371,12 +368,12 @@ public class VariantEvalUtils { * @param stateKeys all the state keys * @return a list of state keys */ - public ArrayList initializeStateKeys(HashMap> stateMap, Stack>> stateStack, StateKey stateKey, ArrayList stateKeys) { + public ArrayList initializeStateKeys(HashMap> stateMap, Stack>> stateStack, StateKey stateKey, ArrayList stateKeys) { if (stateStack == null) { - stateStack = new Stack>>(); + stateStack = new Stack>>(); for (VariantStratifier vs : stateMap.keySet()) { - HashMap> oneSetOfStates = new HashMap>(); + HashMap> oneSetOfStates = new HashMap>(); oneSetOfStates.put(vs, stateMap.get(vs)); stateStack.add(oneSetOfStates); @@ -384,10 +381,10 @@ public class VariantEvalUtils { } if (!stateStack.isEmpty()) { - Stack>> newStateStack = new Stack>>(); + Stack>> newStateStack = new Stack>>(); newStateStack.addAll(stateStack); - HashMap> oneSetOfStates = newStateStack.pop(); + HashMap> oneSetOfStates = newStateStack.pop(); VariantStratifier vs = oneSetOfStates.keySet().iterator().next(); for (String state : oneSetOfStates.get(vs)) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index bb3cd82a1..35ff66243 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -214,7 +214,7 @@ public class SelectVariants extends RodWalker { @Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false) public Set sampleExpressions ; - @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false) + @Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false) public Set sampleFiles; /** @@ -226,7 +226,7 @@ public class SelectVariants extends RodWalker { /** * Note that sample exclusion takes precedence over inclusion, so that if a sample is in both lists it will be excluded. */ - @Argument(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false) + @Input(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false) public Set XLsampleFiles = new HashSet(0); /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index c0f695966..2c7902914 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.TribbleException; -import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; @@ -41,7 +40,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.util.Collection; import java.util.HashSet; -import java.util.List; import java.util.Set; @@ -168,14 +166,9 @@ public class ValidateVariants extends RodWalker { // get the RS IDs Set rsIDs = null; if ( tracker.hasValues(dbsnp.dbsnp) ) { - List dbsnpList = tracker.getValues(dbsnp.dbsnp, ref.getLocus()); rsIDs = new HashSet(); - for ( Object d : dbsnpList ) { - if (d instanceof DbSNPFeature ) - rsIDs.add(((DbSNPFeature)d).getRsID()); - else if (d instanceof VariantContext ) - rsIDs.add(((VariantContext)d).getID()); - } + for ( VariantContext rsID : tracker.getValues(dbsnp.dbsnp, ref.getLocus()) ) + rsIDs.add(rsID.getID()); } try { diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index faa3cf34e..bc200372f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -252,7 +252,8 @@ public class ClippingOp { if (start == 0 && stop == read.getReadLength() -1) return new SAMRecord(read.getHeader()); - CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop); + // If the read is unmapped there is no Cigar string and neither should we create a new cigar string + CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop); // the cigar may force a shift left or right (or both) in case we are left with insertions // starting or ending the read after applying the hard clip on start/stop. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 66e1afecb..12899e898 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -82,7 +82,7 @@ public class PileupElement { // -------------------------------------------------------------------------- private Integer getReducedReadQualityTagValue() { - return (Integer)getRead().getAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); + return getRead().getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); } public boolean isReducedRead() { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 517f9f75d..c55a462f1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -41,6 +41,10 @@ public class GATKSAMRecord extends SAMRecord { // because some values can be null, we don't want to duplicate effort private boolean retrievedReadGroup = false; + /** A private cache for the reduced read quality. Null indicates the value hasn't be fetched yet or isn't available */ + private boolean lookedUpReducedReadQuality = false; + private Integer reducedReadQuality; + // These temporary attributes were added here to make life easier for // certain algorithms by providing a way to label or attach arbitrary data to // individual GATKSAMRecords. @@ -338,7 +342,17 @@ public class GATKSAMRecord extends SAMRecord { public Object getAttribute(final String tag) { return mRecord.getAttribute(tag); } - public Integer getIntegerAttribute(final String tag) { return mRecord.getIntegerAttribute(tag); } + public Integer getIntegerAttribute(final String tag) { + if ( tag == ReadUtils.REDUCED_READ_QUALITY_TAG ) { + if ( ! lookedUpReducedReadQuality ) { + lookedUpReducedReadQuality = true; + reducedReadQuality = mRecord.getIntegerAttribute(tag); + } + return reducedReadQuality; + } else { + return mRecord.getIntegerAttribute(tag); + } + } public Short getShortAttribute(final String tag) { return mRecord.getShortAttribute(tag); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java index e740acf05..95fafac8d 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java @@ -23,7 +23,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --variant:dbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" + + " --variant:OldDbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" + " -T VariantsToVCF" + " -L 1:1-30,000,000" + " -o %s" + diff --git a/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java index 74cd8d8fe..061dc99fa 100644 --- a/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java @@ -31,8 +31,8 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest { private static final int STEP_SIZE = 1; //private static final List QUERY_SIZES = Arrays.asList(1); - private static final List QUERY_SIZES = Arrays.asList(1, 10, 100, 1000); - private static final List CACHE_SIZES = Arrays.asList(-1, 10, 1000); + private static final List QUERY_SIZES = Arrays.asList(1, 10, 100); + private static final List CACHE_SIZES = Arrays.asList(-1, 1000); @DataProvider(name = "fastas") public Object[][] createData1() { diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 2417e5620..2a135496d 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -11,7 +11,7 @@ import net.sf.samtools.SAMFileReader import net.sf.samtools.SAMFileHeader.SortOrder import org.broadinstitute.sting.queue.util.QScriptUtils -import org.broadinstitute.sting.queue.function.{CommandLineFunction, ListWriterFunction} +import org.broadinstitute.sting.queue.function.ListWriterFunction class DataProcessingPipeline extends QScript { qscript => @@ -31,7 +31,7 @@ class DataProcessingPipeline extends QScript { var reference: File = _ @Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true) - var dbSNP: File = _ + var dbSNP: List[File] = List() /**************************************************************************** * Optional Parameters @@ -43,7 +43,7 @@ class DataProcessingPipeline extends QScript { // @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false) - var indels: File = _ + var indels: List[File] = List() @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false) var bwaPath: File = _ @@ -159,7 +159,7 @@ class DataProcessingPipeline extends QScript { for (rg <- readGroups) { val intermediateInBam: File = if (index == readGroups.length) { inBam } else { swapExt(outBam, ".bam", index+1 + "-rg.bam") } val intermediateOutBam: File = if (index > 1) {swapExt(outBam, ".bam", index + "-rg.bam") } else { outBam} - val readGroup = new ReadGroup(rg.getReadGroupId, rg.getPlatform, rg.getLibrary, rg.getPlatformUnit, rg.getSample, rg.getSequencingCenter, rg.getDescription) + val readGroup = new ReadGroup(rg.getReadGroupId, rg.getLibrary, rg.getPlatform, rg.getPlatformUnit, rg.getSample, rg.getSequencingCenter, rg.getDescription) add(addReadGroup(intermediateInBam, intermediateOutBam, readGroup)) index = index - 1 } @@ -321,9 +321,9 @@ class DataProcessingPipeline extends QScript { this.input_file = inBams this.out = outIntervals this.mismatchFraction = 0.0 - this.known :+= qscript.dbSNP + this.known ++= qscript.dbSNP if (indels != null) - this.known :+= qscript.indels + this.known ++= qscript.indels this.scatterCount = nContigs this.analysisName = queueLogDir + outIntervals + ".target" this.jobName = queueLogDir + outIntervals + ".target" @@ -333,9 +333,9 @@ class DataProcessingPipeline extends QScript { this.input_file = inBams this.targetIntervals = tIntervals this.out = outBam - this.known :+= qscript.dbSNP + this.known ++= qscript.dbSNP if (qscript.indels != null) - this.known :+= qscript.indels + this.known ++= qscript.indels this.consensusDeterminationModel = cleanModelEnum this.compress = 0 this.scatterCount = nContigs @@ -344,7 +344,7 @@ class DataProcessingPipeline extends QScript { } case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs { - this.knownSites :+= qscript.dbSNP + this.knownSites ++= qscript.dbSNP this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") this.input_file :+= inBam this.recal_file = outRecalFile diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala index 3c9a3fbcb..80bfe03d1 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala @@ -1,6 +1,5 @@ package org.broadinstitute.sting.queue.qscripts -import org.broadinstitute.sting.commandline.Hidden import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.gatk.phonehome.GATKRunReport @@ -70,7 +69,8 @@ class MethodsDevelopmentCallingPipeline extends QScript { val goldStandardClusterFile = new File(goldStandardName + ".clusters") } - val hg19 = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") + val b37_decoy = new File("/humgen/1kg/reference/human_g1k_v37_decoy.fasta") + val hg19 = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") val hg18 = new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") val b36 = new File("/humgen/1kg/reference/human_b36_both.fasta") val b37 = new File("/humgen/1kg/reference/human_g1k_v37.fasta") @@ -124,6 +124,14 @@ class MethodsDevelopmentCallingPipeline extends QScript { new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.bwa.cleaned.recal.bam"), new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3), + "WExTrioDecoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.list"), + new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** + "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3), + "WGSTrioDecoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.list"), + new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** + "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3), "FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37, new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"), new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** diff --git a/settings/ivysettings.xml b/settings/ivysettings.xml index 2b4a081d4..e17342442 100644 --- a/settings/ivysettings.xml +++ b/settings/ivysettings.xml @@ -1,31 +1,14 @@ - + - - - - - - - - - + + + + + + + + - - - - - - - - - - - - - - - - diff --git a/settings/repository/org.broad/tribble-21.jar b/settings/repository/org.broad/tribble-24.jar similarity index 80% rename from settings/repository/org.broad/tribble-21.jar rename to settings/repository/org.broad/tribble-24.jar index 2cb96f93d..b1c39e60a 100644 Binary files a/settings/repository/org.broad/tribble-21.jar and b/settings/repository/org.broad/tribble-24.jar differ diff --git a/settings/repository/org.broad/tribble-21.xml b/settings/repository/org.broad/tribble-24.xml similarity index 51% rename from settings/repository/org.broad/tribble-21.xml rename to settings/repository/org.broad/tribble-24.xml index cd93af1b0..9b2b967f8 100644 --- a/settings/repository/org.broad/tribble-21.xml +++ b/settings/repository/org.broad/tribble-24.xml @@ -1,3 +1,3 @@ - +