Minor performance improvements to VariantEval -- now all of the CPU time is spent dealing with the ROD system...
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1772 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4554ca1b28
commit
8dd0924b37
|
|
@ -63,6 +63,7 @@ public class RODRecordList<ROD extends ReferenceOrderedDatum> implements Iterabl
|
|||
public List<ROD> getRecords() { return records; }
|
||||
public Iterator<ROD> iterator() { return records.iterator() ; }
|
||||
public void clear() { records.clear(); }
|
||||
public boolean isEmpty() { return records.isEmpty(); }
|
||||
public void add(ROD record) {
|
||||
if ( record != null ) {
|
||||
if ( ! name.equals(record.getName() ) )
|
||||
|
|
|
|||
|
|
@ -142,6 +142,24 @@ public class RefMetaDataTracker {
|
|||
return bound;
|
||||
}
|
||||
|
||||
public int getNBoundRodTracks() {
|
||||
return getNBoundRodTracks(null);
|
||||
}
|
||||
|
||||
public int getNBoundRodTracks(final String excludeIn ) {
|
||||
final String exclude = excludeIn == null ? null : canonicalName(excludeIn);
|
||||
|
||||
int n = 0;
|
||||
for ( RODRecordList<ReferenceOrderedDatum> value : map.values() ) {
|
||||
if ( value != null && ! value.isEmpty() ) {
|
||||
if ( exclude == null || ! value.getName().equals(exclude) )
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
public Collection<ReferenceOrderedDatum> getBoundRodRecords() {
|
||||
LinkedList<ReferenceOrderedDatum> bound = new LinkedList<ReferenceOrderedDatum>();
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ import java.util.List;
|
|||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class CallableBasesAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||
long all_bases = 0;
|
||||
//long all_bases = 0;
|
||||
long all_calls = 0;
|
||||
final static double[] thresholds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100};
|
||||
long[] discoverable_bases = new long[thresholds.length];
|
||||
|
|
@ -30,7 +30,7 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
|
|||
}
|
||||
|
||||
public long nSites() {
|
||||
return all_bases;
|
||||
return super.getMaster().getNMappedSites();
|
||||
}
|
||||
|
||||
public long nCalls() {
|
||||
|
|
@ -54,7 +54,7 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
|
|||
}
|
||||
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
all_bases++;
|
||||
//all_bases++;
|
||||
|
||||
if (eval == null) // no data here!
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
|
||||
PrintStream perLocusStream = null;
|
||||
|
||||
long nSites = 0;
|
||||
long nMappedSites = 0;
|
||||
|
||||
final String ALL_SNPS = "all";
|
||||
final String SINGLETON_SNPS = "singletons";
|
||||
|
|
@ -79,7 +79,6 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
final String[] SIMPLE_ANALYSIS_NAMES = { ALL_SNPS };
|
||||
String[] ALL_ANALYSIS_NAMES = null;
|
||||
|
||||
|
||||
public void initialize() {
|
||||
ALL_ANALYSIS_NAMES = SIMPLE_ANALYSIS_NAMES;
|
||||
if ( extensiveSubsets )
|
||||
|
|
@ -101,6 +100,11 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
BrokenRODSimulator.attach("dbSNP");
|
||||
}
|
||||
|
||||
|
||||
public long getNMappedSites() {
|
||||
return nMappedSites;
|
||||
}
|
||||
|
||||
private ArrayList<VariantAnalysis> getAnalysisSet(final String name) {
|
||||
return analysisSets.containsKey(name) ? analysisSets.get(name) : null;
|
||||
}
|
||||
|
|
@ -185,49 +189,39 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
nSites++;
|
||||
// Iterate over each analysis, and update it
|
||||
Variation eval = (Variation)tracker.lookup("eval", null);
|
||||
nMappedSites++;
|
||||
|
||||
if ( eval != null )
|
||||
if ( eval.getNegLog10PError() < minConfidenceScore ) eval = null;
|
||||
int nBoundGoodRods = tracker.getNBoundRodTracks("interval");
|
||||
if ( nBoundGoodRods > 0 ) {
|
||||
//System.out.printf("%s: n = %d%n", context.getLocation(), nBoundGoodRods );
|
||||
|
||||
// Iterate over each analysis, and update it
|
||||
Variation eval = (Variation)tracker.lookup("eval", null);
|
||||
|
||||
// update stats about all of the SNPs
|
||||
updateAnalysisSet(ALL_SNPS, eval, tracker, ref.getBase(), context);
|
||||
if ( eval != null )
|
||||
if ( eval.getNegLog10PError() < minConfidenceScore ) eval = null;
|
||||
|
||||
// update the known / novel set by checking whether the knownSNPDBName track has an entry here
|
||||
if ( eval != null ) {
|
||||
// if ( ref.getLocus().getStart() >= 10168704 && ref.getLocus().getStop() <= 10168728) System.out.println("###DbSNP from MAP: ");
|
||||
Variation dbsnp = (Variation)BrokenRODSimulator.simulate_lookup("dbSNP",ref.getLocus(),tracker);
|
||||
// if ( ref.getLocus().getStart() >= 10168704 && ref.getLocus().getStop() <= 10168728) System.out.println("###\n");
|
||||
// update stats about all of the SNPs
|
||||
updateAnalysisSet(ALL_SNPS, eval, tracker, ref.getBase(), context);
|
||||
|
||||
// RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData("dbSNP",null);
|
||||
// update the known / novel set by checking whether the knownSNPDBName track has an entry here
|
||||
if ( eval != null ) {
|
||||
Variation dbsnp = (Variation)BrokenRODSimulator.simulate_lookup("dbSNP",ref.getLocus(),tracker);
|
||||
|
||||
//
|
||||
//TODO process correctly all the returned dbSNP rods at each location
|
||||
// if ( last_interval.containsP(ref.getLocus()) ) dbsnp = last_rod; // old RODIterator kept returning the same ROD until we completely walk out of it
|
||||
// else {
|
||||
// if ( rods != null && rods.size() > 0 ) dbsnp = (Variation)rods.getRecords().get(0);
|
||||
// if ( dbsnp != null ) {
|
||||
// last_rod = dbsnp;
|
||||
// last_interval = dbsnp.getLocation(); // remember what we just read
|
||||
// }
|
||||
// }
|
||||
String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS;
|
||||
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
|
||||
}
|
||||
|
||||
// Variation dbsnp = (Variation)tracker.lookup(knownSNPDBName, null);
|
||||
String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS;
|
||||
// if ( dbsnp != null ) out.println(ref.getLocus()+" DBSNP RECORD "+dbsnp.getLocation());
|
||||
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
|
||||
// are we a population backed call? then update
|
||||
if ( eval instanceof SNPCallFromGenotypes) {
|
||||
SNPCallFromGenotypes call = (SNPCallFromGenotypes)eval;
|
||||
int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes();
|
||||
//System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls);
|
||||
final String s = nVarGenotypes == 1 ? SINGLETON_SNPS : TWOHIT_SNPS;
|
||||
updateAnalysisSet(s, eval, tracker, ref.getBase(), context);
|
||||
}
|
||||
}
|
||||
|
||||
// are we a population backed call? then update
|
||||
if ( eval instanceof SNPCallFromGenotypes) {
|
||||
SNPCallFromGenotypes call = (SNPCallFromGenotypes)eval;
|
||||
int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes();
|
||||
//System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls);
|
||||
final String s = nVarGenotypes == 1 ? SINGLETON_SNPS : TWOHIT_SNPS;
|
||||
updateAnalysisSet(s, eval, tracker, ref.getBase(), context);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -270,7 +264,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
Date now = new Date();
|
||||
for ( VariantAnalysis analysis : getAnalysisSet(analysisSetName) ) {
|
||||
String header = getLineHeader(analysisSetName, "summary", analysis.getName());
|
||||
analysis.finalize(nSites);
|
||||
analysis.finalize(getNMappedSites());
|
||||
PrintStream stream = analysis.getSummaryPrintStream();
|
||||
stream.printf("%s%s%n", header, Utils.dupString('-', 78));
|
||||
//stream.printf("%s Analysis set %s%n", analysisSetName, , analysisSetName);
|
||||
|
|
|
|||
Loading…
Reference in New Issue