From 5824dea0c1460429eef4e7cf9ae998e2ddb3ff40 Mon Sep 17 00:00:00 2001 From: kiran Date: Thu, 14 May 2009 16:57:00 +0000 Subject: [PATCH] Trains and calls a read at a time rather than a base at a time (which, given it's name, it should have done in the first place) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@705 348d0f76-0448-11de-a6fe-93d51630548a --- .../secondarybase/BasecallingReadModel.java | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/java/src/org/broadinstitute/sting/secondarybase/BasecallingReadModel.java b/java/src/org/broadinstitute/sting/secondarybase/BasecallingReadModel.java index 283137674..4d0054248 100644 --- a/java/src/org/broadinstitute/sting/secondarybase/BasecallingReadModel.java +++ b/java/src/org/broadinstitute/sting/secondarybase/BasecallingReadModel.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.secondarybase.BasecallingBaseModel; import org.broadinstitute.sting.secondarybase.FourProb; import java.io.File; +import java.util.ArrayList; /** * BasecallingReadModel represents the statistical models for @@ -36,6 +37,20 @@ public class BasecallingReadModel { } } + public void train(BasecallingTrainingSet trainingSet) { + ArrayList trainingData = trainingSet.getTrainingData(); + + for (int readIndex = 0; readIndex < trainingData.size(); readIndex++) { + RawRead read = trainingData.get(readIndex); + addMeanPoints(read); + } + + for (int readIndex = 0; readIndex < trainingData.size(); readIndex++) { + RawRead read = trainingData.get(readIndex); + addCovariancePoints(read); + } + } + /** * Add a single training point to the model means. * @@ -46,6 +61,27 @@ public class BasecallingReadModel { basemodels[cycle].addMeanPoint(probMatrix, fourintensity); } + public void addMeanPoints(RawRead read) { + byte[] seqs = read.getSequence(); + byte[] quals = read.getQuals(); + short[][] ints = read.getIntensities(); + + for (int cycle = 0; cycle < seqs.length; cycle++) { + char basePrev = (char) ((cycle == 0) ? '.' : seqs[cycle - 1]); + char baseCur = (char) seqs[cycle]; + double probCur = QualityUtils.qualToProb(quals[cycle]); + + double[][] probMatrix = getBaseProbabilityMatrix(cycle, basePrev, baseCur, probCur); + + double[] fourIntensity = new double[4]; + for (int channel = 0; channel < 4; channel++) { + fourIntensity[channel] = (double) ints[cycle][channel]; + } + + basemodels[cycle].addMeanPoint(probMatrix, fourIntensity); + } + } + /** * Add a single training point to the model covariances. * @@ -56,6 +92,27 @@ public class BasecallingReadModel { basemodels[cycle].addCovariancePoint(probMatrix, fourintensity); } + public void addCovariancePoints(RawRead read) { + byte[] seqs = read.getSequence(); + byte[] quals = read.getQuals(); + short[][] ints = read.getIntensities(); + + for (int cycle = 0; cycle < seqs.length; cycle++) { + char basePrev = (char) ((cycle == 0) ? '.' : seqs[cycle - 1]); + char baseCur = (char) seqs[cycle]; + double probCur = QualityUtils.qualToProb(quals[cycle]); + + double[][] probMatrix = getBaseProbabilityMatrix(cycle, basePrev, baseCur, probCur); + + double[] fourIntensity = new double[4]; + for (int channel = 0; channel < 4; channel++) { + fourIntensity[channel] = (double) ints[cycle][channel]; + } + + basemodels[cycle].addCovariancePoint(probMatrix, fourIntensity); + } + } + /** * Compute the likelihood matrix for a given cycle. * @@ -107,6 +164,26 @@ public class BasecallingReadModel { return fp; } + public FourProbRead call(RawRead read) { + FourProbRead fpr = new FourProbRead(read.getReadLength()); + + for (int cycle = 0; cycle < read.getReadLength(); cycle++) { + char basePrev = (char) ((cycle == 0) ? '.' : read.getSequence()[cycle - 1]); + byte qualPrev = ((cycle == 0) ? 0 : read.getQuals()[cycle - 1]); + + double[] fourIntensity = new double[4]; + for (int channel = 0; channel < 4; channel++) { + fourIntensity[channel] = (double) read.getIntensities()[cycle][channel]; + } + + //fps[cycle] = computeProbabilities(cycle, basePrev, qualPrev, fourIntensity); + fpr.add(cycle, computeProbabilities(cycle, basePrev, qualPrev, fourIntensity)); + + } + + return fpr; + } + /** * Returns the base probability matrix *