From 5d6421494b201d903881590a17211a028e42c704 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 9 Oct 2013 14:38:15 -0400 Subject: [PATCH 1/2] Fix mismatching number of columns in report Quick fix the missing column header in the QualifyMissingIntervals report. Adding a QScript for the tool as well as a few minor updates to the GATKReportGatherer. --- .../diagnostics/missing/QualifyMissingIntervals.java | 11 ++++++----- .../sting/gatk/report/GATKReportGatherer.java | 8 +++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java index 014ed6dcb..52a92d9ff 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java @@ -47,16 +47,15 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.missing; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Gather; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; -import org.broadinstitute.sting.gatk.walkers.By; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.NanoSchedulable; +import org.broadinstitute.sting.gatk.report.GATKReportGatherer; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; @@ -109,10 +108,12 @@ import java.util.List; */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @By(DataSource.REFERENCE) +@PartitionBy(PartitionType.INTERVAL) public final class QualifyMissingIntervals extends LocusWalker implements NanoSchedulable { /** * A single GATKReport table with the qualifications on why the intervals passed by the -L argument were missing. */ + @Gather(GATKReportGatherer.class) @Output protected PrintStream out; @@ -194,7 +195,7 @@ public final class QualifyMissingIntervals extends LocusWalker if (cdsFile == null) cdsFile = targetsFile; - simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "DP", "TP", "CD", "LN", "DS"); + simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "DP", "TP", "TS", "CD", "LN", "DS"); final GenomeLocParser parser = getToolkit().getGenomeLocParser(); target = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile)); cds = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, cdsFile)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java index e9ccebf34..5e7c3ec86 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java @@ -42,23 +42,21 @@ public class GATKReportGatherer extends Gatherer { try { o = new PrintStream(output); } catch (FileNotFoundException e) { - throw new UserException("File to be output by CoverageByRG Gather function was not found"); + throw new UserException(String.format("File %s to be output by GATKReportGatherer function was not found", output)); } GATKReport current = new GATKReport(); boolean isFirst = true; for (File input : inputs) { - - // If the table is empty if (isFirst) { current = new GATKReport(input); isFirst = false; } else { - GATKReport toAdd = new GATKReport(input); - current.concat(toAdd); + current.concat(new GATKReport(input)); } } current.print(o); + o.close(); } } From 5ed47988b83de0c17cc9b1065522a4aa14f8818b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 24 Oct 2013 17:13:25 -0400 Subject: [PATCH 2/2] Changed the parameter names from cds to baits Making the usage more clear since the parameter is being used over and over to define baited regions. Updated the headers accordingly and made it more readable. --- .../missing/QualifyMissingIntervals.java | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java index 52a92d9ff..54fc6e97e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java @@ -126,11 +126,10 @@ public final class QualifyMissingIntervals extends LocusWalker public String targetsFile; /** - * List of coding sequence intervals (exons) if different from the targets file, to distinguish intervals - * that overlap the cds and intervals that don't. + * List of baits to distinguish untargeted intervals from those that are targeted but not covered */ - @Argument(shortName = "cds", required = false) - public String cdsFile = null; + @Argument(shortName = "baits", required = false) + public String baitsFile = null; /** * This value will be used to determine whether or not an interval had too high or too low GC content to be @@ -183,8 +182,8 @@ public final class QualifyMissingIntervals extends LocusWalker } GATKReport simpleReport; - GenomeLocSortedSet target; - GenomeLocSortedSet cds; + GenomeLocSortedSet targets; + GenomeLocSortedSet baits; public boolean isReduceByInterval() { return true; @@ -192,13 +191,13 @@ public final class QualifyMissingIntervals extends LocusWalker public void initialize() { // if cds file is not provided, just use the targets file (no harm done) - if (cdsFile == null) - cdsFile = targetsFile; + if (baitsFile == null) + baitsFile = targetsFile; - simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "DP", "TP", "TS", "CD", "LN", "DS"); + simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "INTERVAL", "GC", "BQ", "MQ", "DP", "POS_IN_TARGET", "TARGET_SIZE", "BAITED", "MISSING_SIZE", "INTERPRETATION"); final GenomeLocParser parser = getToolkit().getGenomeLocParser(); - target = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile)); - cds = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, cdsFile)); + targets = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile)); + baits = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, baitsFile)); } public Metrics reduceInit() { @@ -241,7 +240,7 @@ public final class QualifyMissingIntervals extends LocusWalker for (Pair r : results) { final GenomeLoc interval = r.getFirst(); final Metrics metrics = r.getSecond(); - final List overlappingIntervals = target.getOverlapping(interval); + final List overlappingIntervals = targets.getOverlapping(interval); simpleReport.addRow( interval.toString(), @@ -251,7 +250,7 @@ public final class QualifyMissingIntervals extends LocusWalker metrics.depth(), getPositionInTarget(interval, overlappingIntervals), getTargetSize(overlappingIntervals), - cds.overlaps(interval), + baits.overlaps(interval), interval.size(), interpret(metrics, interval) );