Merge pull request #334 from broadinstitute/mc_generic_input_for_qualify_missing_intervals
QualifyMissingIntervals: support different formats
This commit is contained in:
commit
9372c5ef41
|
|
@ -91,6 +91,7 @@ final class Metrics {
|
||||||
double gccontent() {return refs > 0 ? gccontent/refs : 0.0;}
|
double gccontent() {return refs > 0 ? gccontent/refs : 0.0;}
|
||||||
double baseQual() {return reads > 0 ? baseQual/reads : 0.0;}
|
double baseQual() {return reads > 0 ? baseQual/reads : 0.0;}
|
||||||
double mapQual() {return reads > 0 ? mapQual/reads : 0.0;}
|
double mapQual() {return reads > 0 ? mapQual/reads : 0.0;}
|
||||||
|
double depth() {return refs > 0 ? (double) reads/refs : 0.0;}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Combines two metrics
|
* Combines two metrics
|
||||||
|
|
|
||||||
|
|
@ -61,14 +61,11 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -115,10 +112,10 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
||||||
@Argument(shortName = "targets", required = true)
|
@Argument(shortName = "targets", required = true)
|
||||||
public File targetsFile;
|
public String targetsFile;
|
||||||
|
|
||||||
@Argument(shortName = "cds", required = false)
|
@Argument(shortName = "cds", required = false)
|
||||||
public File cdsFile = null;
|
public String cdsFile = null;
|
||||||
|
|
||||||
GATKReport simpleReport;
|
GATKReport simpleReport;
|
||||||
GenomeLocSortedSet target;
|
GenomeLocSortedSet target;
|
||||||
|
|
@ -129,13 +126,14 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
||||||
}
|
}
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "TP", "CD", "LN");
|
// if cds file is not provided, just use the targets file (no harm done)
|
||||||
|
if (cdsFile == null)
|
||||||
|
cdsFile = targetsFile;
|
||||||
|
|
||||||
|
simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "DP", "TP", "CD", "LN");
|
||||||
final GenomeLocParser parser = getToolkit().getGenomeLocParser();
|
final GenomeLocParser parser = getToolkit().getGenomeLocParser();
|
||||||
target = new GenomeLocSortedSet(parser);
|
target = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile));
|
||||||
cds = new GenomeLocSortedSet(parser);
|
cds = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, cdsFile));
|
||||||
parseFile(targetsFile, target, parser);
|
|
||||||
if (cdsFile != null)
|
|
||||||
parseFile(cdsFile, cds, parser);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Metrics reduceInit() {
|
public Metrics reduceInit() {
|
||||||
|
|
@ -183,6 +181,7 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
||||||
metrics.gccontent(),
|
metrics.gccontent(),
|
||||||
metrics.baseQual(),
|
metrics.baseQual(),
|
||||||
metrics.mapQual(),
|
metrics.mapQual(),
|
||||||
|
metrics.depth(),
|
||||||
getPositionInTarget(interval),
|
getPositionInTarget(interval),
|
||||||
cds.overlaps(interval),
|
cds.overlaps(interval),
|
||||||
interval.size()
|
interval.size()
|
||||||
|
|
@ -192,31 +191,6 @@ public final class QualifyMissingIntervals extends LocusWalker<Metrics, Metrics>
|
||||||
out.close();
|
out.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static GenomeLoc parseInterval(String s, GenomeLocParser parser) {
|
|
||||||
if (s.isEmpty()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
String[] first = s.split(":");
|
|
||||||
if (first.length == 2) {
|
|
||||||
String[] second = first[1].split("\\-");
|
|
||||||
return parser.createGenomeLoc(first[0], Integer.decode(second[0]), Integer.decode(second[1]));
|
|
||||||
} else {
|
|
||||||
throw new UserException.BadInput("Interval doesn't parse correctly: " + s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void parseFile(File file, GenomeLocSortedSet set, GenomeLocParser parser) {
|
|
||||||
try {
|
|
||||||
for (String s : new XReadLines(file) ) {
|
|
||||||
GenomeLoc interval = parseInterval(s, parser);
|
|
||||||
if (interval != null)
|
|
||||||
set.add(interval, true);
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private int getPositionInTarget(GenomeLoc interval) {
|
private int getPositionInTarget(GenomeLoc interval) {
|
||||||
final List<GenomeLoc> hits = target.getOverlapping(interval);
|
final List<GenomeLoc> hits = target.getOverlapping(interval);
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue