From e3cdf7ef4bff65d404a8352ba7ae2a65b13c8b4d Mon Sep 17 00:00:00 2001 From: kiran Date: Tue, 30 Jun 2009 16:03:15 +0000 Subject: [PATCH] A single class that can be handed reads for training and basecalling. When in training mode, we accumulate no more than 10000 reads and always replace the lowest-quality reads with superior quality reads. Thus, the training set always contains 10000 of the best reads available. After training is complete, the class can be interrogated to return the SQ tag for a given RawRead object. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1125 348d0f76-0448-11de-a6fe-93d51630548a --- .../secondarybase/SecondaryBaseAnnotator.java | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/secondarybase/SecondaryBaseAnnotator.java diff --git a/java/src/org/broadinstitute/sting/secondarybase/SecondaryBaseAnnotator.java b/java/src/org/broadinstitute/sting/secondarybase/SecondaryBaseAnnotator.java new file mode 100755 index 000000000..24473c0b8 --- /dev/null +++ b/java/src/org/broadinstitute/sting/secondarybase/SecondaryBaseAnnotator.java @@ -0,0 +1,55 @@ +package org.broadinstitute.sting.secondarybase; + +import org.broadinstitute.sting.utils.containers.BoundedScoringSet; +import org.broadinstitute.sting.utils.StingException; + +import java.util.ArrayList; +import java.util.Arrays; + +public class SecondaryBaseAnnotator { + private static final int TRAINING_LIMIT = 10000; + private boolean trained; + private final BoundedScoringSet trainingAggregator; + public BasecallingReadModel model; + + public SecondaryBaseAnnotator() { + trained = false; + trainingAggregator = new BoundedScoringSet(TRAINING_LIMIT); + } + + public void addTrainingRead(RawRead rawRead) { trainingAggregator.add(rawRead); } + + public boolean haveEnoughTrainingReads() { return false; } + + public void doneTraining() { + ArrayList trainingData = new ArrayList(trainingAggregator.size()); + + trainingData.addAll(Arrays.asList(trainingAggregator.toArray(new RawRead[0]))); + + model = new BasecallingReadModel(trainingData); + + trained = true; + } + + public FourProbRead getFourProbRead(RawRead rawRead) { + return model.call(rawRead); + } + + public byte[] getSqTagValue(RawRead rawRead) { + if (!trained) { + throw new StingException("Model must be trained via addTrainingRead() before getSqTagValue() can be called"); + } + + FourProbRead fpr = model.call(rawRead); + + return fpr.getSQTag(rawRead); + } + + private void train() {} + + private byte[] getSQTag(FourProbRead fourProbRead, RawRead rawRead) { return null; } + + private static boolean isGoodTrainingRead(RawRead rawRead) { return false; } + + private static double getAverageQualityScore(RawRead rawRead) { return 0.0; } +}