Merge pull request #416 from broadinstitute/mc_quick_fixes_to_cser_pipeline
Add interpretation to QualifyMissingIntervals
This commit is contained in:
commit
09dfaf1a68
|
|
@ -47,16 +47,15 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.missing;
|
package org.broadinstitute.sting.gatk.walkers.diagnostics.missing;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Gather;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
import org.broadinstitute.sting.gatk.walkers.By;
|
import org.broadinstitute.sting.gatk.report.GATKReportGatherer;
|
||||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.NanoSchedulable;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
|
@ -109,10 +108,12 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} )
|
||||||
@By(DataSource.REFERENCE)
|
@By(DataSource.REFERENCE)
|
||||||
|
@PartitionBy(PartitionType.INTERVAL)
|
||||||
public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics> implements NanoSchedulable {
|
public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics> implements NanoSchedulable {
|
||||||
/**
|
/**
|
||||||
* A single GATKReport table with the qualifications on why the intervals passed by the -L argument were missing.
|
* A single GATKReport table with the qualifications on why the intervals passed by the -L argument were missing.
|
||||||
*/
|
*/
|
||||||
|
@Gather(GATKReportGatherer.class)
|
||||||
@Output
|
@Output
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
||||||
|
|
@ -125,11 +126,10 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
||||||
public String targetsFile;
|
public String targetsFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List of coding sequence intervals (exons) if different from the targets file, to distinguish intervals
|
* List of baits to distinguish untargeted intervals from those that are targeted but not covered
|
||||||
* that overlap the cds and intervals that don't.
|
|
||||||
*/
|
*/
|
||||||
@Argument(shortName = "cds", required = false)
|
@Argument(shortName = "baits", required = false)
|
||||||
public String cdsFile = null;
|
public String baitsFile = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This value will be used to determine whether or not an interval had too high or too low GC content to be
|
* This value will be used to determine whether or not an interval had too high or too low GC content to be
|
||||||
|
|
@ -182,8 +182,8 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
||||||
}
|
}
|
||||||
|
|
||||||
GATKReport simpleReport;
|
GATKReport simpleReport;
|
||||||
GenomeLocSortedSet target;
|
GenomeLocSortedSet targets;
|
||||||
GenomeLocSortedSet cds;
|
GenomeLocSortedSet baits;
|
||||||
|
|
||||||
public boolean isReduceByInterval() {
|
public boolean isReduceByInterval() {
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -191,13 +191,13 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
// if cds file is not provided, just use the targets file (no harm done)
|
// if cds file is not provided, just use the targets file (no harm done)
|
||||||
if (cdsFile == null)
|
if (baitsFile == null)
|
||||||
cdsFile = targetsFile;
|
baitsFile = targetsFile;
|
||||||
|
|
||||||
simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "DP", "TP", "CD", "LN", "DS");
|
simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "INTERVAL", "GC", "BQ", "MQ", "DP", "POS_IN_TARGET", "TARGET_SIZE", "BAITED", "MISSING_SIZE", "INTERPRETATION");
|
||||||
final GenomeLocParser parser = getToolkit().getGenomeLocParser();
|
final GenomeLocParser parser = getToolkit().getGenomeLocParser();
|
||||||
target = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile));
|
targets = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile));
|
||||||
cds = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, cdsFile));
|
baits = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, baitsFile));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Metrics reduceInit() {
|
public Metrics reduceInit() {
|
||||||
|
|
@ -240,7 +240,7 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
||||||
for (Pair<GenomeLoc, Metrics> r : results) {
|
for (Pair<GenomeLoc, Metrics> r : results) {
|
||||||
final GenomeLoc interval = r.getFirst();
|
final GenomeLoc interval = r.getFirst();
|
||||||
final Metrics metrics = r.getSecond();
|
final Metrics metrics = r.getSecond();
|
||||||
final List<GenomeLoc> overlappingIntervals = target.getOverlapping(interval);
|
final List<GenomeLoc> overlappingIntervals = targets.getOverlapping(interval);
|
||||||
|
|
||||||
simpleReport.addRow(
|
simpleReport.addRow(
|
||||||
interval.toString(),
|
interval.toString(),
|
||||||
|
|
@ -250,7 +250,7 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
||||||
metrics.depth(),
|
metrics.depth(),
|
||||||
getPositionInTarget(interval, overlappingIntervals),
|
getPositionInTarget(interval, overlappingIntervals),
|
||||||
getTargetSize(overlappingIntervals),
|
getTargetSize(overlappingIntervals),
|
||||||
cds.overlaps(interval),
|
baits.overlaps(interval),
|
||||||
interval.size(),
|
interval.size(),
|
||||||
interpret(metrics, interval)
|
interpret(metrics, interval)
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -42,23 +42,21 @@ public class GATKReportGatherer extends Gatherer {
|
||||||
try {
|
try {
|
||||||
o = new PrintStream(output);
|
o = new PrintStream(output);
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
throw new UserException("File to be output by CoverageByRG Gather function was not found");
|
throw new UserException(String.format("File %s to be output by GATKReportGatherer function was not found", output));
|
||||||
}
|
}
|
||||||
|
|
||||||
GATKReport current = new GATKReport();
|
GATKReport current = new GATKReport();
|
||||||
boolean isFirst = true;
|
boolean isFirst = true;
|
||||||
for (File input : inputs) {
|
for (File input : inputs) {
|
||||||
|
|
||||||
// If the table is empty
|
|
||||||
if (isFirst) {
|
if (isFirst) {
|
||||||
current = new GATKReport(input);
|
current = new GATKReport(input);
|
||||||
isFirst = false;
|
isFirst = false;
|
||||||
} else {
|
} else {
|
||||||
GATKReport toAdd = new GATKReport(input);
|
current.concat(new GATKReport(input));
|
||||||
current.concat(toAdd);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
current.print(o);
|
current.print(o);
|
||||||
|
o.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue