From 8c076143212c03e3c41d1755680ce1db89827681 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 10 Jul 2013 16:16:28 -0400 Subject: [PATCH] QualifyMissingIntervals: support different formats Problem ------- Qualify Missing Intervals only accepted GATK formatted interval files for it's coding sequence and bait parameters. Solution ------- There is no reason for such limitation, I erased all the code that did the parsing and used IntervalUtils to parse it (therefore, now it handles any type of interval file that the GATK can handle). ps: Also added an average depth column to the output --- .../walkers/diagnostics/missing/Metrics.java | 1 + .../missing/QualifyMissingIntervals.java | 48 +++++-------------- 2 files changed, 12 insertions(+), 37 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java index 5e3da5f4f..9296cc89b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java @@ -91,6 +91,7 @@ final class Metrics { double gccontent() {return refs > 0 ? gccontent/refs : 0.0;} double baseQual() {return reads > 0 ? baseQual/reads : 0.0;} double mapQual() {return reads > 0 ? mapQual/reads : 0.0;} + double depth() {return refs > 0 ? (double) reads/refs : 0.0;} /** * Combines two metrics diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java index d0db3ef98..609f11f97 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java @@ -61,14 +61,11 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.HelpConstants; +import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.text.XReadLines; -import java.io.File; -import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.List; @@ -115,10 +112,10 @@ public final class QualifyMissingIntervals extends LocusWalker protected PrintStream out; @Argument(shortName = "targets", required = true) - public File targetsFile; + public String targetsFile; @Argument(shortName = "cds", required = false) - public File cdsFile = null; + public String cdsFile = null; GATKReport simpleReport; GenomeLocSortedSet target; @@ -129,13 +126,14 @@ public final class QualifyMissingIntervals extends LocusWalker } public void initialize() { - simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "TP", "CD", "LN"); + // if cds file is not provided, just use the targets file (no harm done) + if (cdsFile == null) + cdsFile = targetsFile; + + simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "DP", "TP", "CD", "LN"); final GenomeLocParser parser = getToolkit().getGenomeLocParser(); - target = new GenomeLocSortedSet(parser); - cds = new GenomeLocSortedSet(parser); - parseFile(targetsFile, target, parser); - if (cdsFile != null) - parseFile(cdsFile, cds, parser); + target = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile)); + cds = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, cdsFile)); } public Metrics reduceInit() { @@ -183,6 +181,7 @@ public final class QualifyMissingIntervals extends LocusWalker metrics.gccontent(), metrics.baseQual(), metrics.mapQual(), + metrics.depth(), getPositionInTarget(interval), cds.overlaps(interval), interval.size() @@ -192,31 +191,6 @@ public final class QualifyMissingIntervals extends LocusWalker out.close(); } - private static GenomeLoc parseInterval(String s, GenomeLocParser parser) { - if (s.isEmpty()) { - return null; - } - String[] first = s.split(":"); - if (first.length == 2) { - String[] second = first[1].split("\\-"); - return parser.createGenomeLoc(first[0], Integer.decode(second[0]), Integer.decode(second[1])); - } else { - throw new UserException.BadInput("Interval doesn't parse correctly: " + s); - } - } - - private void parseFile(File file, GenomeLocSortedSet set, GenomeLocParser parser) { - try { - for (String s : new XReadLines(file) ) { - GenomeLoc interval = parseInterval(s, parser); - if (interval != null) - set.add(interval, true); - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } - } - private int getPositionInTarget(GenomeLoc interval) { final List hits = target.getOverlapping(interval); int result = 0;