Factored out some simple stats accumulation.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@733 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
81fac73c01
commit
5d60efc498
|
|
@ -47,13 +47,6 @@ public class AnnotateSecondaryBase extends CommandLineProgram {
|
||||||
protected int execute() {
|
protected int execute() {
|
||||||
BasecallingTrainingSet trainingSet = new BasecallingTrainingSet(BUSTARD_DIR, LANE, CYCLE_BEGIN, CYCLE_END, TRAINING_LIMIT);
|
BasecallingTrainingSet trainingSet = new BasecallingTrainingSet(BUSTARD_DIR, LANE, CYCLE_BEGIN, CYCLE_END, TRAINING_LIMIT);
|
||||||
|
|
||||||
/*
|
|
||||||
// This doesn't work right now...
|
|
||||||
// Find alignments with zero mismatches and store them until we've picked up TRAINING_LIMIT alignments
|
|
||||||
System.out.println("Loading training set from the first " + TRAINING_LIMIT + " perfect reads in the aligned data...");
|
|
||||||
trainingSet.loadPreAlignedTrainingSet(SAM_IN, REFERENCE);
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Iterate through raw Firecrest data and store the first N reads up to TRAINING_LIMIT
|
// Iterate through raw Firecrest data and store the first N reads up to TRAINING_LIMIT
|
||||||
System.out.println("Loading training set from the first " + TRAINING_LIMIT + " reads in the raw data...");
|
System.out.println("Loading training set from the first " + TRAINING_LIMIT + " reads in the raw data...");
|
||||||
trainingSet.loadFirstNUnambiguousReadsTrainingSet();
|
trainingSet.loadFirstNUnambiguousReadsTrainingSet();
|
||||||
|
|
@ -68,41 +61,21 @@ public class AnnotateSecondaryBase extends CommandLineProgram {
|
||||||
SAMFileHeader sfh = new SAMFileHeader();
|
SAMFileHeader sfh = new SAMFileHeader();
|
||||||
SAMFileWriter sfw = new SAMFileWriterFactory().makeSAMOrBAMWriter(sfh, false, SAM_OUT);
|
SAMFileWriter sfw = new SAMFileWriterFactory().makeSAMOrBAMWriter(sfh, false, SAM_OUT);
|
||||||
|
|
||||||
|
BasecallingStats bstats = new BasecallingStats();
|
||||||
IlluminaParser iparser = new IlluminaParser(BUSTARD_DIR, LANE, CYCLE_BEGIN, CYCLE_END);
|
IlluminaParser iparser = new IlluminaParser(BUSTARD_DIR, LANE, CYCLE_BEGIN, CYCLE_END);
|
||||||
|
|
||||||
RawRead rr;
|
RawRead rr;
|
||||||
|
while (bstats.getReadsTotal() < CALLING_LIMIT && (rr = iparser.next()) != null) {
|
||||||
int basesConsistent = 0, basesTotal = 0, readsTotal = 0;
|
|
||||||
|
|
||||||
while (readsTotal < CALLING_LIMIT && (rr = iparser.next()) != null) {
|
|
||||||
FourProbRead fpr = model.call(rr);
|
FourProbRead fpr = model.call(rr);
|
||||||
|
|
||||||
SAMRecord sr = constructSAMRecord(rr, fpr, sfh, RUN_BARCODE);
|
sfw.addAlignment(constructSAMRecord(rr, fpr, sfh, RUN_BARCODE));
|
||||||
sfw.addAlignment(sr);
|
bstats.update(rr, fpr, 10000);
|
||||||
|
|
||||||
for (int cycle = 0; cycle < fpr.size(); cycle++) {
|
|
||||||
int rawBaseIndex = BaseUtils.simpleBaseToBaseIndex((char) rr.getSequence()[cycle]);
|
|
||||||
int fpBaseIndex = fpr.get(cycle).indexAtRank(0);
|
|
||||||
|
|
||||||
if (rawBaseIndex >= 0 && fpBaseIndex >= 0) {
|
|
||||||
basesTotal++;
|
|
||||||
|
|
||||||
if (rawBaseIndex == fpBaseIndex) {
|
|
||||||
basesConsistent++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (basesTotal % 10000 == 0 && basesTotal > 0) {
|
|
||||||
System.out.printf("%% bases consistent: %d/%d (%4.4f)\r", basesConsistent, basesTotal, ((double) basesConsistent)/((double) basesTotal));
|
|
||||||
}
|
|
||||||
|
|
||||||
readsTotal++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
iparser.close();
|
iparser.close();
|
||||||
sfw.close();
|
sfw.close();
|
||||||
|
|
||||||
System.out.printf("%% bases consistent: %d/%d (%4.4f)\n", basesConsistent, basesTotal, ((double) basesConsistent)/((double) basesTotal));
|
System.out.printf("%% bases consistent: %d/%d (%4.4f)\n", bstats.getBasesConsistent(), bstats.getBasesTotal(), bstats.getPercentConsistent());
|
||||||
System.out.println("Done.");
|
System.out.println("Done.");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,83 @@
|
||||||
|
package org.broadinstitute.sting.secondarybase;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* BasecallingStats is a utility class to aggregate and emit basecalling
|
||||||
|
* stats (total bases seen and consistency between basecalling methods).
|
||||||
|
*
|
||||||
|
* @author Kiran Garimella
|
||||||
|
*/
|
||||||
|
public class BasecallingStats {
|
||||||
|
private int basesConsistent = 0;
|
||||||
|
private int basesTotal = 0;
|
||||||
|
private int readsTotal = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor that does nothing.
|
||||||
|
*/
|
||||||
|
public BasecallingStats() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the number of bases called identically by two different methods.
|
||||||
|
*
|
||||||
|
* @return the number of consistent bases.
|
||||||
|
*/
|
||||||
|
public int getBasesConsistent() {
|
||||||
|
return basesConsistent;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the total number of bases seen.
|
||||||
|
*
|
||||||
|
* @return the total number of bases seen.
|
||||||
|
*/
|
||||||
|
public int getBasesTotal() {
|
||||||
|
return basesTotal;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the total number of reads seen.
|
||||||
|
*
|
||||||
|
* @return the total number of reads seen.
|
||||||
|
*/
|
||||||
|
public int getReadsTotal() {
|
||||||
|
return readsTotal;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the percent of bases called consistently by two different methods.
|
||||||
|
*
|
||||||
|
* @return the percent of bases called consistently
|
||||||
|
*/
|
||||||
|
public double getPercentConsistent() {
|
||||||
|
return ((double) getBasesConsistent())/((double) getReadsTotal());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates the number of bases seen, the number of reads seen, and the number of consistent bases.
|
||||||
|
*
|
||||||
|
* @param rr the raw Illumina read
|
||||||
|
* @param fpr the FourProb read
|
||||||
|
*/
|
||||||
|
public void update(RawRead rr, FourProbRead fpr, int updateInterval) {
|
||||||
|
for (int cycle = 0; cycle < fpr.size(); cycle++) {
|
||||||
|
int rawBaseIndex = BaseUtils.simpleBaseToBaseIndex((char) rr.getSequence()[cycle]);
|
||||||
|
int fpBaseIndex = fpr.get(cycle).indexAtRank(0);
|
||||||
|
|
||||||
|
if (rawBaseIndex >= 0 && fpBaseIndex >= 0) {
|
||||||
|
basesTotal++;
|
||||||
|
|
||||||
|
if (rawBaseIndex == fpBaseIndex) {
|
||||||
|
basesConsistent++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (basesTotal % updateInterval == 0 && basesTotal > 0) {
|
||||||
|
System.out.printf("%% bases consistent: %d/%d (%4.4f)\r", basesConsistent, basesTotal, ((double) basesConsistent)/((double) basesTotal));
|
||||||
|
}
|
||||||
|
|
||||||
|
readsTotal++;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue