Merge pull request #334 from broadinstitute/mc_generic_input_for_qualify_missing_intervals

QualifyMissingIntervals: support different formats
This commit is contained in:
Eric Banks 2013-07-25 12:39:26 -07:00
commit 9372c5ef41
2 changed files with 12 additions and 37 deletions

View File

@ -91,6 +91,7 @@ final class Metrics {
double gccontent() {return refs > 0 ? gccontent/refs : 0.0;}
double baseQual() {return reads > 0 ? baseQual/reads : 0.0;}
double mapQual() {return reads > 0 ? mapQual/reads : 0.0;}
double depth() {return refs > 0 ? (double) reads/refs : 0.0;}
/**
* Combines two metrics

View File

@ -61,14 +61,11 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.help.HelpConstants;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.List;
@ -115,10 +112,10 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
protected PrintStream out;
@Argument(shortName = "targets", required = true)
public File targetsFile;
public String targetsFile;
@Argument(shortName = "cds", required = false)
public File cdsFile = null;
public String cdsFile = null;
GATKReport simpleReport;
GenomeLocSortedSet target;
@ -129,13 +126,14 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
}
public void initialize() {
simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "TP", "CD", "LN");
// if cds file is not provided, just use the targets file (no harm done)
if (cdsFile == null)
cdsFile = targetsFile;
simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "DP", "TP", "CD", "LN");
final GenomeLocParser parser = getToolkit().getGenomeLocParser();
target = new GenomeLocSortedSet(parser);
cds = new GenomeLocSortedSet(parser);
parseFile(targetsFile, target, parser);
if (cdsFile != null)
parseFile(cdsFile, cds, parser);
target = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile));
cds = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, cdsFile));
}
public Metrics reduceInit() {
@ -183,6 +181,7 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
metrics.gccontent(),
metrics.baseQual(),
metrics.mapQual(),
metrics.depth(),
getPositionInTarget(interval),
cds.overlaps(interval),
interval.size()
@ -192,31 +191,6 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
out.close();
}
private static GenomeLoc parseInterval(String s, GenomeLocParser parser) {
if (s.isEmpty()) {
return null;
}
String[] first = s.split(":");
if (first.length == 2) {
String[] second = first[1].split("\\-");
return parser.createGenomeLoc(first[0], Integer.decode(second[0]), Integer.decode(second[1]));
} else {
throw new UserException.BadInput("Interval doesn't parse correctly: " + s);
}
}
private void parseFile(File file, GenomeLocSortedSet set, GenomeLocParser parser) {
try {
for (String s : new XReadLines(file) ) {
GenomeLoc interval = parseInterval(s, parser);
if (interval != null)
set.add(interval, true);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
private int getPositionInTarget(GenomeLoc interval) {
final List<GenomeLoc> hits = target.getOverlapping(interval);
int result = 0;