diff --git a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java index 1f873ffbd..3ad84c9a5 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java @@ -29,17 +29,14 @@ import org.broad.tribble.AbstractFeatureReader; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureReader; -import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; import java.util.*; /** @@ -73,7 +70,11 @@ public final class IntervalBinding { return stringIntervals; } - public List getIntervals(GenomeAnalysisEngine toolkit) { + public List getIntervals(final GenomeAnalysisEngine toolkit) { + return getIntervals(toolkit.getGenomeLocParser()); + } + + public List getIntervals(final GenomeLocParser genomeLocParser) { List intervals; if ( featureIntervals != null ) { @@ -83,17 +84,17 @@ public final class IntervalBinding { final FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec(); if ( codec instanceof ReferenceDependentFeatureCodec ) - ((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser()); + ((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(genomeLocParser); try { FeatureReader reader = AbstractFeatureReader.getFeatureReader(featureIntervals.getSource(), codec, false); for ( Feature feature : reader.iterator() ) - intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(feature)); + intervals.add(genomeLocParser.createGenomeLoc(feature)); } catch (Exception e) { throw new UserException.MalformedFile(featureIntervals.getSource(), "Problem reading the interval file", e); } } else { - intervals = IntervalUtils.parseIntervalArguments(toolkit.getGenomeLocParser(), stringIntervals); + intervals = IntervalUtils.parseIntervalArguments(genomeLocParser, stringIntervals); } return intervals; diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index d77f266bd..6fa70f437 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -592,7 +592,7 @@ public class GenomeAnalysisEngine { // if include argument isn't given, create new set of all possible intervals GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ? GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) : - loadIntervals(argCollection.intervals, argCollection.intervalSetRule)); + loadIntervals(argCollection.intervals, argCollection.intervalSetRule, argCollection.intervalPadding)); // if no exclude arguments, can return parseIntervalArguments directly if ( argCollection.excludeIntervals == null ) @@ -619,16 +619,31 @@ public class GenomeAnalysisEngine { * @param rule interval merging rule * @return A sorted, merged list of all intervals specified in this arg list. */ - protected GenomeLocSortedSet loadIntervals( List> argList, IntervalSetRule rule ) { + protected GenomeLocSortedSet loadIntervals( final List> argList, final IntervalSetRule rule ) { + return loadIntervals(argList, rule, 0); + } + + /** + * Loads the intervals relevant to the current execution + * @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above + * @param rule interval merging rule + * @param padding how much to pad the intervals + * @return A sorted, merged list of all intervals specified in this arg list. + */ + protected GenomeLocSortedSet loadIntervals( final List> argList, final IntervalSetRule rule, final int padding ) { List allIntervals = new ArrayList(); for ( IntervalBinding intervalBinding : argList ) { - List intervals = intervalBinding.getIntervals(this); + List intervals = intervalBinding.getIntervals(this.getGenomeLocParser()); if ( intervals.isEmpty() ) { logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed."); } + if ( padding > 0 ) { + intervals = IntervalUtils.getIntervalsWithFlanks(this.getGenomeLocParser(), intervals, padding); + } + allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index bd830a920..13c737a2e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -104,6 +104,12 @@ public class GATKArgumentCollection { @Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false) public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL; + /** + * For example, '-L chr1:100' with a padding value of 20 would turn into '-L chr1:80-120'. + */ + @Argument(fullName = "interval_padding", shortName = "ip", doc = "Indicates how many basepairs of padding to include around each of the intervals specified with the -L/--intervals argument", required = false) + public int intervalPadding = 0; + @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) public File referenceFile = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index 7edadfc4b..bec1ea543 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -283,6 +283,41 @@ public class GATKReport { return output; } + /** + * The constructor for a simplified GATK Report. Simplified GATK report are designed for reports that do not need + * the advanced functionality of a full GATK Report. + *

+ * A simple GATK Report consists of: + *

+ * - A single table + * - No primary key ( it is hidden ) + *

+ * Optional: + * - Only untyped columns. As long as the data is an Object, it will be accepted. + * - Default column values being empty strings. + *

+ * Limitations: + *

+ * - A simple GATK report cannot contain multiple tables. + * - It cannot contain typed columns, which prevents arithmetic gathering. + * + * @param tableName The name of your simple GATK report table + * @param columns The names of the columns in your table + * @return a simplified GATK report + */ + public static GATKReport newSimpleReport(final String tableName, final List columns) { + GATKReportTable table = new GATKReportTable(tableName, "A simplified GATK table report", columns.size()); + + for (String column : columns) { + table.addColumn(column, ""); + } + + GATKReport output = new GATKReport(); + output.addTable(table); + + return output; + } + /** * This method provides an efficient way to populate a simplified GATK report. This method will only work on reports * that qualify as simplified GATK reports. See the newSimpleReport() constructor for more information. @@ -303,4 +338,27 @@ public class GATKReport { for ( int i = 0; i < values.length; i++ ) table.set(rowIndex, i, values[i]); } + + /** + * This method provides an efficient way to populate a simplified GATK report. This method will only work on reports + * that qualify as simplified GATK reports. See the newSimpleReport() constructor for more information. + * + * @param values the row of data to be added to the table. + * Note: the number of arguments must match the columns in the table. + */ + public void addRowList(final List values) { + if ( tables.size() != 1 ) + throw new ReviewedStingException("Cannot write a row to a complex GATK Report"); + + GATKReportTable table = tables.firstEntry().getValue(); + if ( table.getNumColumns() != values.size() ) + throw new ReviewedStingException("The number of arguments in writeRow() must match the number of columns in the table"); + + final int rowIndex = table.getNumRows(); + int idx = 0; + for ( Object value : values ) { + table.set(rowIndex,idx,value); + idx++; + } + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 833dce932..ef52014f3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -405,15 +405,19 @@ public class DepthOfCoverageWalker extends LocusWalker 0 ) { + for(DoCOutputType.Partition partition: partitionTypes) { + if ( checkType(statsByInterval.get(0).getSecond().getCoverageByAggregationType(partition) ,partition) ) { + printIntervalStats(statsByInterval, + getCorrectStream(partition, DoCOutputType.Aggregation.interval, DoCOutputType.FileType.summary), + getCorrectStream(partition, DoCOutputType.Aggregation.interval, DoCOutputType.FileType.statistics), + partition); + } else { + throw new ReviewedStingException("Partition type "+partition.toString()+" had no entries. Please check that your .bam header has all appropriate partition types."); + } } + } else { + throw new UserException.CommandLineException("Cannot reduce by interval without interval list provided. Please provide a -L argument."); } onTraversalDone(mergeAll(statsByInterval)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 7a3b85567..158f20b61 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantSummary; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -14,8 +14,9 @@ import java.util.*; /** * Stratifies the eval RODs by the allele count of the alternate allele * - * Looks at the AC value in the INFO field, and uses that value if present. If absent, - * computes the AC from the genotypes themselves. For no AC can be computed, 0 is used. + * Looks first at the MLEAC value in the INFO field, and uses that value if present. + * If not present, it then looks for the AC value in the INFO field. If both are absent, + * it computes the AC from the genotypes themselves. If no AC can be computed, 0 is used. */ public class AlleleCount extends VariantStratifier { @Override @@ -41,8 +42,10 @@ public class AlleleCount extends VariantStratifier { if (eval != null) { int AC = 0; // by default, the site is considered monomorphic - if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) { - AC = eval.getAttributeAsInt("AC", 0); + if ( eval.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && eval.isBiallelic() ) { + AC = eval.getAttributeAsInt(VCFConstants.MLE_ALLELE_COUNT_KEY, 0); + } else if ( eval.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && eval.isBiallelic() ) { + AC = eval.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0); } else if ( eval.isVariant() ) { for (Allele allele : eval.getAlternateAlleles()) AC = Math.max(AC, eval.getCalledChrCount(allele)); diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index e33ff6f30..b3f4542af 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -587,6 +587,27 @@ public final class GenomeLocParser { return createGenomeLoc(contigName, contigIndex, start, stop, true); } + /** + * Creates a loc padded in both directions by maxBasePairs size (if possible). + * @param loc The original loc + * @param padding The number of base pairs to pad on either end + * @return The contiguous loc of length up to the original length + 2*padding (depending on the start/end of the contig). + */ + @Requires({"loc != null", "padding > 0"}) + public GenomeLoc createPaddedGenomeLoc(final GenomeLoc loc, final int padding) { + if (GenomeLoc.isUnmapped(loc)) + return loc; + final String contigName = loc.getContig(); + final SAMSequenceRecord contig = contigInfo.getSequence(contigName); + final int contigIndex = contig.getSequenceIndex(); + final int contigLength = contig.getSequenceLength(); + + final int start = Math.max(1, loc.getStart() - padding); + final int stop = Math.min(contigLength, loc.getStop() + padding); + + return createGenomeLoc(contigName, contigIndex, start, stop, true); + } + /** * Creates a loc to the right (starting at the loc stop + 1) of maxBasePairs size. * @param loc The original loc diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index ea1eaeb51..c96226405 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -648,6 +648,26 @@ public class IntervalUtils { return expanded; } + /** + * Returns a list of intervals between the passed int locs. Does not extend UNMAPPED locs. + * @param parser A genome loc parser for creating the new intervals + * @param locs Original genome locs + * @param basePairs Number of base pairs on each side of loc + * @return The list of intervals between the locs + */ + public static List getIntervalsWithFlanks(final GenomeLocParser parser, final List locs, final int basePairs) { + + if (locs.size() == 0) + return Collections.emptyList(); + + final List expanded = new ArrayList(); + for ( final GenomeLoc loc : locs ) { + expanded.add(parser.createPaddedGenomeLoc(loc, basePairs)); + } + + return sortAndMergeIntervals(parser, expanded, IntervalMergingRule.ALL).toList(); + } + private static LinkedHashMap> splitByContig(List sorted) { LinkedHashMap> splits = new LinkedHashMap>(); GenomeLoc last = null; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index c1dd8b18b..27ce5f408 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -32,11 +32,12 @@ import java.util.Arrays; public class VariantEvalIntegrationTest extends WalkerTest { private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/"; - private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; - private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; - private static String fundamentalTestSNPsSplit1of2VCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.split_1_of_2.vcf"; - private static String fundamentalTestSNPsSplit2of2VCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.split_2_of_2.vcf"; - private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf"; + private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; + private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf"; + private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf"; + private static String fundamentalTestSNPsSplit1of2VCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.split_1_of_2.vcf"; + private static String fundamentalTestSNPsSplit2of2VCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.split_2_of_2.vcf"; + private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf"; private static String cmdRoot = "-T VariantEval" + " -R " + b36KGReference; @@ -457,6 +458,27 @@ public class VariantEvalIntegrationTest extends WalkerTest { executeTest("testAlleleCountStrat", spec); } + @Test + public void testAlleleCountStratWithMLE() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestSNPsWithMLEVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST AlleleCount", + "-L " + fundamentalTestSNPsWithMLEVCF, + "-o %s" + ), + 1, + Arrays.asList("cadd4e32ab82f7e43d0e510dfe5e6dda") + ); + executeTest("testAlleleCountStratWithMLE", spec); + } + @Test public void testMultipleEvalTracksAlleleCountWithMerge() { WalkerTestSpec spec = new WalkerTestSpec( diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 1f2d5b5c5..4e7131c7e 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -128,7 +128,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("5bf9663274ceb552f5469f8c1dfc22ed") + Arrays.asList("be38bdc7bd88f5d09cf1a9d55cfecb0b") ); executeTest("testRegenotype--" + testFile, spec); diff --git a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java index 4d8e096c1..875765ca8 100644 --- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java @@ -207,11 +207,26 @@ public class IntervalIntegrationTest extends WalkerTest { " -R " + hg18Reference + " -o %s" + " -L " + validationDataLocation + "intervalTest.3.vcf", - 1, // just one output file - Arrays.asList(md5)); + 1, // just one output file + Arrays.asList(md5)); executeTest("testComplexVCF", spec); } + @Test(enabled = true) + public void testComplexVCFWithPadding() { + String md5 = "649ee93d50739c656e94ec88a32c7ffe"; + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T CountLoci" + + " --interval_padding 2" + + " -I " + validationDataLocation + "OV-0930.normal.chunk.bam" + + " -R " + hg18Reference + + " -o %s" + + " -L " + validationDataLocation + "intervalTest.3.vcf", + 1, // just one output file + Arrays.asList(md5)); + executeTest("testComplexVCFWithPadding", spec); + } + @Test(enabled = true) public void testMergingWithComplexVCF() { String md5 = "6d7fce9fee471194aa8b5b6e47267f03";