Merge pull request #416 from broadinstitute/mc_quick_fixes_to_cser_pipeline
Add interpretation to QualifyMissingIntervals
This commit is contained in:
commit
09dfaf1a68
|
|
@ -47,16 +47,15 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.diagnostics.missing;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Gather;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.NanoSchedulable;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportGatherer;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
|
|
@ -109,10 +108,12 @@ import java.util.List;
|
|||
*/
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||
@By(DataSource.REFERENCE)
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics> implements NanoSchedulable {
|
||||
/**
|
||||
* A single GATKReport table with the qualifications on why the intervals passed by the -L argument were missing.
|
||||
*/
|
||||
@Gather(GATKReportGatherer.class)
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
|
|
@ -125,11 +126,10 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
|||
public String targetsFile;
|
||||
|
||||
/**
|
||||
* List of coding sequence intervals (exons) if different from the targets file, to distinguish intervals
|
||||
* that overlap the cds and intervals that don't.
|
||||
* List of baits to distinguish untargeted intervals from those that are targeted but not covered
|
||||
*/
|
||||
@Argument(shortName = "cds", required = false)
|
||||
public String cdsFile = null;
|
||||
@Argument(shortName = "baits", required = false)
|
||||
public String baitsFile = null;
|
||||
|
||||
/**
|
||||
* This value will be used to determine whether or not an interval had too high or too low GC content to be
|
||||
|
|
@ -182,8 +182,8 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
|||
}
|
||||
|
||||
GATKReport simpleReport;
|
||||
GenomeLocSortedSet target;
|
||||
GenomeLocSortedSet cds;
|
||||
GenomeLocSortedSet targets;
|
||||
GenomeLocSortedSet baits;
|
||||
|
||||
public boolean isReduceByInterval() {
|
||||
return true;
|
||||
|
|
@ -191,13 +191,13 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
|||
|
||||
public void initialize() {
|
||||
// if cds file is not provided, just use the targets file (no harm done)
|
||||
if (cdsFile == null)
|
||||
cdsFile = targetsFile;
|
||||
if (baitsFile == null)
|
||||
baitsFile = targetsFile;
|
||||
|
||||
simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "DP", "TP", "CD", "LN", "DS");
|
||||
simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "INTERVAL", "GC", "BQ", "MQ", "DP", "POS_IN_TARGET", "TARGET_SIZE", "BAITED", "MISSING_SIZE", "INTERPRETATION");
|
||||
final GenomeLocParser parser = getToolkit().getGenomeLocParser();
|
||||
target = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile));
|
||||
cds = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, cdsFile));
|
||||
targets = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile));
|
||||
baits = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, baitsFile));
|
||||
}
|
||||
|
||||
public Metrics reduceInit() {
|
||||
|
|
@ -240,7 +240,7 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
|||
for (Pair<GenomeLoc, Metrics> r : results) {
|
||||
final GenomeLoc interval = r.getFirst();
|
||||
final Metrics metrics = r.getSecond();
|
||||
final List<GenomeLoc> overlappingIntervals = target.getOverlapping(interval);
|
||||
final List<GenomeLoc> overlappingIntervals = targets.getOverlapping(interval);
|
||||
|
||||
simpleReport.addRow(
|
||||
interval.toString(),
|
||||
|
|
@ -250,7 +250,7 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
|||
metrics.depth(),
|
||||
getPositionInTarget(interval, overlappingIntervals),
|
||||
getTargetSize(overlappingIntervals),
|
||||
cds.overlaps(interval),
|
||||
baits.overlaps(interval),
|
||||
interval.size(),
|
||||
interpret(metrics, interval)
|
||||
);
|
||||
|
|
|
|||
|
|
@ -42,23 +42,21 @@ public class GATKReportGatherer extends Gatherer {
|
|||
try {
|
||||
o = new PrintStream(output);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException("File to be output by CoverageByRG Gather function was not found");
|
||||
throw new UserException(String.format("File %s to be output by GATKReportGatherer function was not found", output));
|
||||
}
|
||||
|
||||
GATKReport current = new GATKReport();
|
||||
boolean isFirst = true;
|
||||
for (File input : inputs) {
|
||||
|
||||
// If the table is empty
|
||||
if (isFirst) {
|
||||
current = new GATKReport(input);
|
||||
isFirst = false;
|
||||
} else {
|
||||
GATKReport toAdd = new GATKReport(input);
|
||||
current.concat(toAdd);
|
||||
current.concat(new GATKReport(input));
|
||||
}
|
||||
}
|
||||
|
||||
current.print(o);
|
||||
o.close();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue