Merge pull request #416 from broadinstitute/mc_quick_fixes_to_cser_pipeline

Add interpretation to QualifyMissingIntervals
This commit is contained in:
Eric Banks 2013-11-05 06:08:13 -08:00
commit 09dfaf1a68
2 changed files with 20 additions and 22 deletions

View File

@ -47,16 +47,15 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.missing; package org.broadinstitute.sting.gatk.walkers.diagnostics.missing;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Gather;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.gatk.report.GATKReportGatherer;
import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.NanoSchedulable;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLocSortedSet;
@ -109,10 +108,12 @@ import java.util.List;
*/ */
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
@By(DataSource.REFERENCE) @By(DataSource.REFERENCE)
@PartitionBy(PartitionType.INTERVAL)
public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics> implements NanoSchedulable { public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics> implements NanoSchedulable {
/** /**
* A single GATKReport table with the qualifications on why the intervals passed by the -L argument were missing. * A single GATKReport table with the qualifications on why the intervals passed by the -L argument were missing.
*/ */
@Gather(GATKReportGatherer.class)
@Output @Output
protected PrintStream out; protected PrintStream out;
@ -125,11 +126,10 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
public String targetsFile; public String targetsFile;
/** /**
* List of coding sequence intervals (exons) if different from the targets file, to distinguish intervals * List of baits to distinguish untargeted intervals from those that are targeted but not covered
* that overlap the cds and intervals that don't.
*/ */
@Argument(shortName = "cds", required = false) @Argument(shortName = "baits", required = false)
public String cdsFile = null; public String baitsFile = null;
/** /**
* This value will be used to determine whether or not an interval had too high or too low GC content to be * This value will be used to determine whether or not an interval had too high or too low GC content to be
@ -182,8 +182,8 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
} }
GATKReport simpleReport; GATKReport simpleReport;
GenomeLocSortedSet target; GenomeLocSortedSet targets;
GenomeLocSortedSet cds; GenomeLocSortedSet baits;
public boolean isReduceByInterval() { public boolean isReduceByInterval() {
return true; return true;
@ -191,13 +191,13 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
public void initialize() { public void initialize() {
// if cds file is not provided, just use the targets file (no harm done) // if cds file is not provided, just use the targets file (no harm done)
if (cdsFile == null) if (baitsFile == null)
cdsFile = targetsFile; baitsFile = targetsFile;
simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "DP", "TP", "CD", "LN", "DS"); simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "INTERVAL", "GC", "BQ", "MQ", "DP", "POS_IN_TARGET", "TARGET_SIZE", "BAITED", "MISSING_SIZE", "INTERPRETATION");
final GenomeLocParser parser = getToolkit().getGenomeLocParser(); final GenomeLocParser parser = getToolkit().getGenomeLocParser();
target = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile)); targets = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile));
cds = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, cdsFile)); baits = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, baitsFile));
} }
public Metrics reduceInit() { public Metrics reduceInit() {
@ -240,7 +240,7 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
for (Pair<GenomeLoc, Metrics> r : results) { for (Pair<GenomeLoc, Metrics> r : results) {
final GenomeLoc interval = r.getFirst(); final GenomeLoc interval = r.getFirst();
final Metrics metrics = r.getSecond(); final Metrics metrics = r.getSecond();
final List<GenomeLoc> overlappingIntervals = target.getOverlapping(interval); final List<GenomeLoc> overlappingIntervals = targets.getOverlapping(interval);
simpleReport.addRow( simpleReport.addRow(
interval.toString(), interval.toString(),
@ -250,7 +250,7 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
metrics.depth(), metrics.depth(),
getPositionInTarget(interval, overlappingIntervals), getPositionInTarget(interval, overlappingIntervals),
getTargetSize(overlappingIntervals), getTargetSize(overlappingIntervals),
cds.overlaps(interval), baits.overlaps(interval),
interval.size(), interval.size(),
interpret(metrics, interval) interpret(metrics, interval)
); );

View File

@ -42,23 +42,21 @@ public class GATKReportGatherer extends Gatherer {
try { try {
o = new PrintStream(output); o = new PrintStream(output);
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new UserException("File to be output by CoverageByRG Gather function was not found"); throw new UserException(String.format("File %s to be output by GATKReportGatherer function was not found", output));
} }
GATKReport current = new GATKReport(); GATKReport current = new GATKReport();
boolean isFirst = true; boolean isFirst = true;
for (File input : inputs) { for (File input : inputs) {
// If the table is empty
if (isFirst) { if (isFirst) {
current = new GATKReport(input); current = new GATKReport(input);
isFirst = false; isFirst = false;
} else { } else {
GATKReport toAdd = new GATKReport(input); current.concat(new GATKReport(input));
current.concat(toAdd);
} }
} }
current.print(o); current.print(o);
o.close();
} }
} }