Merge pull request #416 from broadinstitute/mc_quick_fixes_to_cser_pipeline

Add interpretation to QualifyMissingIntervals
This commit is contained in:
Eric Banks 2013-11-05 06:08:13 -08:00
commit 09dfaf1a68
2 changed files with 20 additions and 22 deletions

View File

@ -47,16 +47,15 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.missing;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Gather;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.NanoSchedulable;
import org.broadinstitute.sting.gatk.report.GATKReportGatherer;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
@ -109,10 +108,12 @@ import java.util.List;
*/
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
@By(DataSource.REFERENCE)
@PartitionBy(PartitionType.INTERVAL)
public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics> implements NanoSchedulable {
/**
* A single GATKReport table with the qualifications on why the intervals passed by the -L argument were missing.
*/
@Gather(GATKReportGatherer.class)
@Output
protected PrintStream out;
@ -125,11 +126,10 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
public String targetsFile;
/**
* List of coding sequence intervals (exons) if different from the targets file, to distinguish intervals
* that overlap the cds and intervals that don't.
* List of baits to distinguish untargeted intervals from those that are targeted but not covered
*/
@Argument(shortName = "cds", required = false)
public String cdsFile = null;
@Argument(shortName = "baits", required = false)
public String baitsFile = null;
/**
* This value will be used to determine whether or not an interval had too high or too low GC content to be
@ -182,8 +182,8 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
}
GATKReport simpleReport;
GenomeLocSortedSet target;
GenomeLocSortedSet cds;
GenomeLocSortedSet targets;
GenomeLocSortedSet baits;
public boolean isReduceByInterval() {
return true;
@ -191,13 +191,13 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
public void initialize() {
// if cds file is not provided, just use the targets file (no harm done)
if (cdsFile == null)
cdsFile = targetsFile;
if (baitsFile == null)
baitsFile = targetsFile;
simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "DP", "TP", "CD", "LN", "DS");
simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "INTERVAL", "GC", "BQ", "MQ", "DP", "POS_IN_TARGET", "TARGET_SIZE", "BAITED", "MISSING_SIZE", "INTERPRETATION");
final GenomeLocParser parser = getToolkit().getGenomeLocParser();
target = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile));
cds = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, cdsFile));
targets = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile));
baits = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, baitsFile));
}
public Metrics reduceInit() {
@ -240,7 +240,7 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
for (Pair<GenomeLoc, Metrics> r : results) {
final GenomeLoc interval = r.getFirst();
final Metrics metrics = r.getSecond();
final List<GenomeLoc> overlappingIntervals = target.getOverlapping(interval);
final List<GenomeLoc> overlappingIntervals = targets.getOverlapping(interval);
simpleReport.addRow(
interval.toString(),
@ -250,7 +250,7 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
metrics.depth(),
getPositionInTarget(interval, overlappingIntervals),
getTargetSize(overlappingIntervals),
cds.overlaps(interval),
baits.overlaps(interval),
interval.size(),
interpret(metrics, interval)
);

View File

@ -42,23 +42,21 @@ public class GATKReportGatherer extends Gatherer {
try {
o = new PrintStream(output);
} catch (FileNotFoundException e) {
throw new UserException("File to be output by CoverageByRG Gather function was not found");
throw new UserException(String.format("File %s to be output by GATKReportGatherer function was not found", output));
}
GATKReport current = new GATKReport();
boolean isFirst = true;
for (File input : inputs) {
// If the table is empty
if (isFirst) {
current = new GATKReport(input);
isFirst = false;
} else {
GATKReport toAdd = new GATKReport(input);
current.concat(toAdd);
current.concat(new GATKReport(input));
}
}
current.print(o);
o.close();
}
}