Renamed to BasecallingTrainer.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@752 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
01a3cb27c7
commit
08c9f4d86b
|
|
@ -19,11 +19,9 @@ import java.util.regex.Pattern;
|
||||||
*
|
*
|
||||||
* @author Kiran Garimella
|
* @author Kiran Garimella
|
||||||
*/
|
*/
|
||||||
public class BasecallingTrainingSet {
|
public class BasecallingTrainer {
|
||||||
private File bustardDir;
|
private File bustardDir;
|
||||||
private int lane;
|
private int lane;
|
||||||
private int cycleBegin;
|
|
||||||
private int cycleEnd;
|
|
||||||
private int trainingLimit;
|
private int trainingLimit;
|
||||||
|
|
||||||
private ArrayList<RawRead> trainingData;
|
private ArrayList<RawRead> trainingData;
|
||||||
|
|
@ -33,15 +31,11 @@ public class BasecallingTrainingSet {
|
||||||
*
|
*
|
||||||
* @param bustardDir the Bustard directory for the sample
|
* @param bustardDir the Bustard directory for the sample
|
||||||
* @param lane the lane for the sample
|
* @param lane the lane for the sample
|
||||||
* @param cycleBegin the start cycle for the beginning of the read (0-based, inclusive)
|
|
||||||
* @param cycleEnd the stop cycle for the end of the read (0-based, inclusive)
|
|
||||||
* @param trainingLimit the number of training reads to accept
|
* @param trainingLimit the number of training reads to accept
|
||||||
*/
|
*/
|
||||||
public BasecallingTrainingSet(File bustardDir, int lane, int cycleBegin, int cycleEnd, int trainingLimit) {
|
public BasecallingTrainer(File bustardDir, int lane, int trainingLimit) {
|
||||||
this.bustardDir = bustardDir;
|
this.bustardDir = bustardDir;
|
||||||
this.lane = lane;
|
this.lane = lane;
|
||||||
this.cycleBegin = cycleBegin;
|
|
||||||
this.cycleEnd = cycleEnd;
|
|
||||||
this.trainingLimit = trainingLimit;
|
this.trainingLimit = trainingLimit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -69,12 +63,14 @@ public class BasecallingTrainingSet {
|
||||||
public void loadFirstNUnambiguousReadsTrainingSet() {
|
public void loadFirstNUnambiguousReadsTrainingSet() {
|
||||||
this.trainingData = new ArrayList<RawRead>(trainingLimit);
|
this.trainingData = new ArrayList<RawRead>(trainingLimit);
|
||||||
|
|
||||||
IlluminaParser iparser = new IlluminaParser(bustardDir, lane, cycleBegin, cycleEnd);
|
IlluminaParser iparser = new IlluminaParser(bustardDir, lane);
|
||||||
|
|
||||||
RawRead rawread;
|
RawRead rawread;
|
||||||
int numreads = 0;
|
int numreads = 0;
|
||||||
|
|
||||||
while (numreads < trainingLimit && (rawread = iparser.next()) != null) {
|
while (numreads < trainingLimit && iparser.next()) {
|
||||||
|
rawread = iparser.getRawRead();
|
||||||
|
|
||||||
int numAmbiguous = 0;
|
int numAmbiguous = 0;
|
||||||
byte[] sequence = rawread.getSequence();
|
byte[] sequence = rawread.getSequence();
|
||||||
|
|
||||||
|
|
@ -171,16 +167,10 @@ public class BasecallingTrainingSet {
|
||||||
return trainingReads;
|
return trainingReads;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Correlate the perfect reads with their raw intensities. Sloooooooow.
|
|
||||||
*
|
|
||||||
* @param trainingReads the perfect reads, grouped by tile
|
|
||||||
* @return a training set of raw sequence, intensities, and quality scores (all set to 40 for these perfect bases)
|
|
||||||
*/
|
|
||||||
private ArrayList<RawRead> correlateReadsAndIntensities(Vector<HashMap<String, SAMRecord>> trainingReads) {
|
private ArrayList<RawRead> correlateReadsAndIntensities(Vector<HashMap<String, SAMRecord>> trainingReads) {
|
||||||
ArrayList<RawRead> newTrainingData = new ArrayList<RawRead>(trainingLimit);
|
ArrayList<RawRead> newTrainingData = new ArrayList<RawRead>(trainingLimit);
|
||||||
|
|
||||||
IlluminaParser iparser = new IlluminaParser(bustardDir, lane, cycleBegin, cycleEnd);
|
IlluminaParser iparser = new IlluminaParser(bustardDir, lane);
|
||||||
|
|
||||||
int totalReadCount = 0;
|
int totalReadCount = 0;
|
||||||
|
|
||||||
|
|
@ -190,7 +180,8 @@ public class BasecallingTrainingSet {
|
||||||
int tileReadCount = 0;
|
int tileReadCount = 0;
|
||||||
|
|
||||||
RawRead iread;
|
RawRead iread;
|
||||||
while (trainingReads.get(tile) != null && tileReadCount < trainingReads.get(tile).size() && (iread = iparser.next()) != null) {
|
while (trainingReads.get(tile) != null && tileReadCount < trainingReads.get(tile).size() && iparser.next()) {
|
||||||
|
iread = iparser.getRawRead();
|
||||||
String readKey = iread.getReadKey();
|
String readKey = iread.getReadKey();
|
||||||
|
|
||||||
if (trainingReads.get(tile).containsKey(readKey)) {
|
if (trainingReads.get(tile).containsKey(readKey)) {
|
||||||
Loading…
Reference in New Issue