A single class that can be handed reads for training and basecalling. When in training mode, we accumulate no more than 10000 reads and always replace the lowest-quality reads with superior quality reads. Thus, the training set always contains 10000 of the best reads available. After training is complete, the class can be interrogated to return the SQ tag for a given RawRead object.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1125 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-06-30 16:03:15 +00:00
parent 74cc7136f7
commit e3cdf7ef4b
1 changed files with 55 additions and 0 deletions

View File

@ -0,0 +1,55 @@
package org.broadinstitute.sting.secondarybase;
import org.broadinstitute.sting.utils.containers.BoundedScoringSet;
import org.broadinstitute.sting.utils.StingException;
import java.util.ArrayList;
import java.util.Arrays;
public class SecondaryBaseAnnotator {
private static final int TRAINING_LIMIT = 10000;
private boolean trained;
private final BoundedScoringSet<RawRead> trainingAggregator;
public BasecallingReadModel model;
public SecondaryBaseAnnotator() {
trained = false;
trainingAggregator = new BoundedScoringSet<RawRead>(TRAINING_LIMIT);
}
public void addTrainingRead(RawRead rawRead) { trainingAggregator.add(rawRead); }
public boolean haveEnoughTrainingReads() { return false; }
public void doneTraining() {
ArrayList<RawRead> trainingData = new ArrayList<RawRead>(trainingAggregator.size());
trainingData.addAll(Arrays.asList(trainingAggregator.toArray(new RawRead[0])));
model = new BasecallingReadModel(trainingData);
trained = true;
}
public FourProbRead getFourProbRead(RawRead rawRead) {
return model.call(rawRead);
}
public byte[] getSqTagValue(RawRead rawRead) {
if (!trained) {
throw new StingException("Model must be trained via addTrainingRead() before getSqTagValue() can be called");
}
FourProbRead fpr = model.call(rawRead);
return fpr.getSQTag(rawRead);
}
private void train() {}
private byte[] getSQTag(FourProbRead fourProbRead, RawRead rawRead) { return null; }
private static boolean isGoodTrainingRead(RawRead rawRead) { return false; }
private static double getAverageQualityScore(RawRead rawRead) { return 0.0; }
}