Minor performance improvements to VariantEval -- now all of the CPU time is spent dealing with the ROD system...

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1772 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-10-06 23:40:30 +00:00
parent 4554ca1b28
commit 8dd0924b37
4 changed files with 54 additions and 41 deletions

View File

@ -63,6 +63,7 @@ public class RODRecordList<ROD extends ReferenceOrderedDatum> implements Iterabl
public List<ROD> getRecords() { return records; }
public Iterator<ROD> iterator() { return records.iterator() ; }
public void clear() { records.clear(); }
public boolean isEmpty() { return records.isEmpty(); }
public void add(ROD record) {
if ( record != null ) {
if ( ! name.equals(record.getName() ) )

View File

@ -142,6 +142,24 @@ public class RefMetaDataTracker {
return bound;
}
public int getNBoundRodTracks() {
return getNBoundRodTracks(null);
}
public int getNBoundRodTracks(final String excludeIn ) {
final String exclude = excludeIn == null ? null : canonicalName(excludeIn);
int n = 0;
for ( RODRecordList<ReferenceOrderedDatum> value : map.values() ) {
if ( value != null && ! value.isEmpty() ) {
if ( exclude == null || ! value.getName().equals(exclude) )
n++;
}
}
return n;
}
public Collection<ReferenceOrderedDatum> getBoundRodRecords() {
LinkedList<ReferenceOrderedDatum> bound = new LinkedList<ReferenceOrderedDatum>();

View File

@ -19,7 +19,7 @@ import java.util.List;
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
public class CallableBasesAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis {
long all_bases = 0;
//long all_bases = 0;
long all_calls = 0;
final static double[] thresholds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100};
long[] discoverable_bases = new long[thresholds.length];
@ -30,7 +30,7 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
}
public long nSites() {
return all_bases;
return super.getMaster().getNMappedSites();
}
public long nCalls() {
@ -54,7 +54,7 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
}
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
all_bases++;
//all_bases++;
if (eval == null) // no data here!
return null;

View File

@ -67,7 +67,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
PrintStream perLocusStream = null;
long nSites = 0;
long nMappedSites = 0;
final String ALL_SNPS = "all";
final String SINGLETON_SNPS = "singletons";
@ -79,7 +79,6 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
final String[] SIMPLE_ANALYSIS_NAMES = { ALL_SNPS };
String[] ALL_ANALYSIS_NAMES = null;
public void initialize() {
ALL_ANALYSIS_NAMES = SIMPLE_ANALYSIS_NAMES;
if ( extensiveSubsets )
@ -101,6 +100,11 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
BrokenRODSimulator.attach("dbSNP");
}
public long getNMappedSites() {
return nMappedSites;
}
private ArrayList<VariantAnalysis> getAnalysisSet(final String name) {
return analysisSets.containsKey(name) ? analysisSets.get(name) : null;
}
@ -185,49 +189,39 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nSites++;
// Iterate over each analysis, and update it
Variation eval = (Variation)tracker.lookup("eval", null);
nMappedSites++;
if ( eval != null )
if ( eval.getNegLog10PError() < minConfidenceScore ) eval = null;
int nBoundGoodRods = tracker.getNBoundRodTracks("interval");
if ( nBoundGoodRods > 0 ) {
//System.out.printf("%s: n = %d%n", context.getLocation(), nBoundGoodRods );
// Iterate over each analysis, and update it
Variation eval = (Variation)tracker.lookup("eval", null);
// update stats about all of the SNPs
updateAnalysisSet(ALL_SNPS, eval, tracker, ref.getBase(), context);
if ( eval != null )
if ( eval.getNegLog10PError() < minConfidenceScore ) eval = null;
// update the known / novel set by checking whether the knownSNPDBName track has an entry here
if ( eval != null ) {
// if ( ref.getLocus().getStart() >= 10168704 && ref.getLocus().getStop() <= 10168728) System.out.println("###DbSNP from MAP: ");
Variation dbsnp = (Variation)BrokenRODSimulator.simulate_lookup("dbSNP",ref.getLocus(),tracker);
// if ( ref.getLocus().getStart() >= 10168704 && ref.getLocus().getStop() <= 10168728) System.out.println("###\n");
// update stats about all of the SNPs
updateAnalysisSet(ALL_SNPS, eval, tracker, ref.getBase(), context);
// RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData("dbSNP",null);
// update the known / novel set by checking whether the knownSNPDBName track has an entry here
if ( eval != null ) {
Variation dbsnp = (Variation)BrokenRODSimulator.simulate_lookup("dbSNP",ref.getLocus(),tracker);
//
//TODO process correctly all the returned dbSNP rods at each location
// if ( last_interval.containsP(ref.getLocus()) ) dbsnp = last_rod; // old RODIterator kept returning the same ROD until we completely walk out of it
// else {
// if ( rods != null && rods.size() > 0 ) dbsnp = (Variation)rods.getRecords().get(0);
// if ( dbsnp != null ) {
// last_rod = dbsnp;
// last_interval = dbsnp.getLocation(); // remember what we just read
// }
// }
String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS;
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
}
// Variation dbsnp = (Variation)tracker.lookup(knownSNPDBName, null);
String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS;
// if ( dbsnp != null ) out.println(ref.getLocus()+" DBSNP RECORD "+dbsnp.getLocation());
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
// are we a population backed call? then update
if ( eval instanceof SNPCallFromGenotypes) {
SNPCallFromGenotypes call = (SNPCallFromGenotypes)eval;
int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes();
//System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls);
final String s = nVarGenotypes == 1 ? SINGLETON_SNPS : TWOHIT_SNPS;
updateAnalysisSet(s, eval, tracker, ref.getBase(), context);
}
}
// are we a population backed call? then update
if ( eval instanceof SNPCallFromGenotypes) {
SNPCallFromGenotypes call = (SNPCallFromGenotypes)eval;
int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes();
//System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls);
final String s = nVarGenotypes == 1 ? SINGLETON_SNPS : TWOHIT_SNPS;
updateAnalysisSet(s, eval, tracker, ref.getBase(), context);
}
return 1;
}
@ -270,7 +264,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
Date now = new Date();
for ( VariantAnalysis analysis : getAnalysisSet(analysisSetName) ) {
String header = getLineHeader(analysisSetName, "summary", analysis.getName());
analysis.finalize(nSites);
analysis.finalize(getNMappedSites());
PrintStream stream = analysis.getSummaryPrintStream();
stream.printf("%s%s%n", header, Utils.dupString('-', 78));
//stream.printf("%s Analysis set %s%n", analysisSetName, , analysisSetName);