Can optionally process raw or corrected intensities.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@349 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6cdad10dd1
commit
b7a2e82b46
|
|
@ -7,6 +7,10 @@ import java.io.PrintWriter;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.playground.illumina.FirecrestFileParser;
|
||||||
|
import org.broadinstitute.sting.playground.illumina.FirecrestReadData;
|
||||||
|
import org.broadinstitute.sting.playground.illumina.FourIntensity;
|
||||||
|
import org.broadinstitute.sting.playground.illumina.IlluminaParser;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMFileWriter;
|
import net.sf.samtools.SAMFileWriter;
|
||||||
import net.sf.samtools.SAMFileWriterFactory;
|
import net.sf.samtools.SAMFileWriterFactory;
|
||||||
|
|
@ -23,6 +27,7 @@ public class FourBaseRecaller extends CommandLineProgram {
|
||||||
public int END = 0;
|
public int END = 0;
|
||||||
public int TRAINING_LIMIT = 1000000000;
|
public int TRAINING_LIMIT = 1000000000;
|
||||||
public int CALLING_LIMIT = 1000000000;
|
public int CALLING_LIMIT = 1000000000;
|
||||||
|
public Boolean RAW = false;
|
||||||
|
|
||||||
public static void main(String[] argv) {
|
public static void main(String[] argv) {
|
||||||
Instance = new FourBaseRecaller();
|
Instance = new FourBaseRecaller();
|
||||||
|
|
@ -36,11 +41,15 @@ public class FourBaseRecaller extends CommandLineProgram {
|
||||||
m_parser.addOptionalArg("end", "E", "End of read to process (0 = whole read, i.e. unpaired; 1 = first end; 2 = second end)", "END");
|
m_parser.addOptionalArg("end", "E", "End of read to process (0 = whole read, i.e. unpaired; 1 = first end; 2 = second end)", "END");
|
||||||
m_parser.addOptionalArg("tlim", "T", "Number of reads to use for parameter initialization", "TRAINING_LIMIT");
|
m_parser.addOptionalArg("tlim", "T", "Number of reads to use for parameter initialization", "TRAINING_LIMIT");
|
||||||
m_parser.addOptionalArg("clim", "C", "Number of reads to basecall", "CALLING_LIMIT");
|
m_parser.addOptionalArg("clim", "C", "Number of reads to basecall", "CALLING_LIMIT");
|
||||||
|
m_parser.addOptionalFlag("raw", "R", "Use raw intensities?", "RAW");
|
||||||
}
|
}
|
||||||
|
|
||||||
protected int execute() {
|
protected int execute() {
|
||||||
boolean isPaired = (END > 0);
|
boolean isPaired = (END > 0);
|
||||||
|
|
||||||
|
//IlluminaParser ip = new IlluminaParser(DIR, LANE, 0);
|
||||||
|
//System.exit(1);
|
||||||
|
|
||||||
// Set up debugging paths
|
// Set up debugging paths
|
||||||
File debugdir = new File(OUT.getPath() + ".debug/");
|
File debugdir = new File(OUT.getPath() + ".debug/");
|
||||||
debugdir.mkdir();
|
debugdir.mkdir();
|
||||||
|
|
@ -53,57 +62,68 @@ public class FourBaseRecaller extends CommandLineProgram {
|
||||||
BustardFileParser bfp;
|
BustardFileParser bfp;
|
||||||
BustardReadData bread;
|
BustardReadData bread;
|
||||||
|
|
||||||
|
FirecrestFileParser ffp;
|
||||||
|
FirecrestReadData fread;
|
||||||
|
|
||||||
bfp = new BustardFileParser(DIR, LANE, isPaired, "FB");
|
bfp = new BustardFileParser(DIR, LANE, isPaired, "FB");
|
||||||
bread = bfp.next();
|
bread = bfp.next();
|
||||||
|
ffp = new FirecrestFileParser(DIR.getParentFile(), LANE);
|
||||||
|
fread = ffp.next();
|
||||||
|
|
||||||
int cycle_offset = (END <= 1) ? 0 : bread.getIntensities().length/2;
|
int cycle_offset = (END <= 1) ? 0 : bread.getIntensities().length/2;
|
||||||
BasecallingReadModel model = new BasecallingReadModel(bread.getFirstReadSequence().length());
|
BasecallingReadModel model = new BasecallingReadModel(bread.getFirstReadSequence().length());
|
||||||
int queryid;
|
int queryid;
|
||||||
|
|
||||||
// learn mean parameters
|
// learn mean parameters
|
||||||
if (debugout != null) { debugout.println("intensity int_a int_c int_g int_t base"); }
|
if (debugout != null) { debugout.println("intensity cycle int_a int_c int_g int_t base"); }
|
||||||
|
|
||||||
queryid = 0;
|
queryid = 0;
|
||||||
do {
|
do {
|
||||||
String bases = (END <= 1) ? bread.getFirstReadSequence() : bread.getSecondReadSequence();
|
String bases = (END <= 1) ? bread.getFirstReadSequence() : bread.getSecondReadSequence();
|
||||||
byte[] quals = (END <= 1) ? bread.getFirstReadPhredBinaryQualities() : bread.getSecondReadPhredBinaryQualities();
|
byte[] quals = (END <= 1) ? bread.getFirstReadPhredBinaryQualities() : bread.getSecondReadPhredBinaryQualities();
|
||||||
double[][] intensities = bread.getIntensities();
|
double[][] intensities = bread.getIntensities();
|
||||||
|
double[][] rawintensities = fread.getIntensities();
|
||||||
|
|
||||||
for (int cycle = 0; cycle < bases.length(); cycle++) {
|
for (int cycle = 0; cycle < bases.length(); cycle++) {
|
||||||
char baseCur = bases.charAt(cycle);
|
char baseCur = bases.charAt(cycle);
|
||||||
byte qualCur = quals[cycle];
|
byte qualCur = quals[cycle];
|
||||||
double[] fourintensity = intensities[cycle + cycle_offset];
|
double[] fourintensity = (RAW) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];
|
||||||
|
|
||||||
if (debugout != null && cycle == 0) {
|
if (debugout != null && cycle >= 31 && cycle <= 33) {
|
||||||
debugout.println("intensity " + intensities[0][0] + " " + intensities[0][1] + " " + intensities[0][2] + " " + intensities[0][3] + " " + baseCur);
|
debugout.println("intensity " + cycle + " " + fourintensity[0] + " " + fourintensity[1] + " " + fourintensity[2] + " " + fourintensity[3] + " " + baseCur);
|
||||||
}
|
}
|
||||||
|
|
||||||
model.addMeanPoint(cycle, baseCur, qualCur, fourintensity);
|
model.addMeanPoint(cycle, baseCur, qualCur, fourintensity);
|
||||||
}
|
}
|
||||||
|
|
||||||
queryid++;
|
queryid++;
|
||||||
} while (queryid < TRAINING_LIMIT && bfp.hasNext() && (bread = bfp.next()) != null);
|
} while (queryid < TRAINING_LIMIT && bfp.hasNext() && (bread = bfp.next()) != null && (fread = ffp.next()) != null);
|
||||||
|
|
||||||
|
debugout.close();
|
||||||
|
|
||||||
// learn covariance parameters
|
// learn covariance parameters
|
||||||
bfp = new BustardFileParser(DIR, LANE, isPaired, "FB");
|
bfp = new BustardFileParser(DIR, LANE, isPaired, "FB");
|
||||||
bread = bfp.next();
|
bread = bfp.next();
|
||||||
|
ffp = new FirecrestFileParser(DIR.getParentFile(), LANE);
|
||||||
|
fread = ffp.next();
|
||||||
|
|
||||||
queryid = 0;
|
queryid = 0;
|
||||||
do {
|
do {
|
||||||
String bases = (END <= 1) ? bread.getFirstReadSequence() : bread.getSecondReadSequence();
|
String bases = (END <= 1) ? bread.getFirstReadSequence() : bread.getSecondReadSequence();
|
||||||
byte[] quals = (END <= 1) ? bread.getFirstReadPhredBinaryQualities() : bread.getSecondReadPhredBinaryQualities();
|
byte[] quals = (END <= 1) ? bread.getFirstReadPhredBinaryQualities() : bread.getSecondReadPhredBinaryQualities();
|
||||||
double[][] intensities = bread.getIntensities();
|
double[][] intensities = bread.getIntensities();
|
||||||
|
double[][] rawintensities = fread.getIntensities();
|
||||||
|
|
||||||
for (int cycle = 0; cycle < bases.length(); cycle++) {
|
for (int cycle = 0; cycle < bases.length(); cycle++) {
|
||||||
char baseCur = bases.charAt(cycle);
|
char baseCur = bases.charAt(cycle);
|
||||||
byte qualCur = quals[cycle];
|
byte qualCur = quals[cycle];
|
||||||
double[] fourintensity = intensities[cycle + cycle_offset];
|
double[] fourintensity = (RAW) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];
|
||||||
|
|
||||||
model.addCovariancePoint(cycle, baseCur, qualCur, fourintensity);
|
model.addCovariancePoint(cycle, baseCur, qualCur, fourintensity);
|
||||||
}
|
}
|
||||||
|
|
||||||
queryid++;
|
queryid++;
|
||||||
} while (queryid < TRAINING_LIMIT && bfp.hasNext() && (bread = bfp.next()) != null);
|
} while (queryid < TRAINING_LIMIT && bfp.hasNext() && (bread = bfp.next()) != null && (fread = ffp.next()) != null);
|
||||||
|
|
||||||
// write debugging info
|
// write debugging info
|
||||||
model.write(debugdir);
|
model.write(debugdir);
|
||||||
|
|
@ -114,26 +134,25 @@ public class FourBaseRecaller extends CommandLineProgram {
|
||||||
|
|
||||||
bfp = new BustardFileParser(DIR, LANE, isPaired, "FB");
|
bfp = new BustardFileParser(DIR, LANE, isPaired, "FB");
|
||||||
bread = bfp.next();
|
bread = bfp.next();
|
||||||
|
ffp = new FirecrestFileParser(DIR.getParentFile(), LANE);
|
||||||
|
fread = ffp.next();
|
||||||
|
|
||||||
queryid = 0;
|
queryid = 0;
|
||||||
do {
|
do {
|
||||||
String bases = (END <= 1) ? bread.getFirstReadSequence() : bread.getSecondReadSequence();
|
String bases = (END <= 1) ? bread.getFirstReadSequence() : bread.getSecondReadSequence();
|
||||||
byte[] quals = (END <= 1) ? bread.getFirstReadPhredBinaryQualities() : bread.getSecondReadPhredBinaryQualities();
|
byte[] quals = (END <= 1) ? bread.getFirstReadPhredBinaryQualities() : bread.getSecondReadPhredBinaryQualities();
|
||||||
double[][] intensities = bread.getIntensities();
|
double[][] intensities = bread.getIntensities();
|
||||||
|
double[][] rawintensities = fread.getIntensities();
|
||||||
|
|
||||||
byte[] asciiseq = new byte[bases.length()];
|
byte[] asciiseq = new byte[bases.length()];
|
||||||
byte[] bestqual = new byte[bases.length()];
|
byte[] bestqual = new byte[bases.length()];
|
||||||
byte[] nextbestqual = new byte[bases.length()];
|
byte[] nextbestqual = new byte[bases.length()];
|
||||||
|
|
||||||
for (int cycle = 0; cycle < bases.length(); cycle++) {
|
for (int cycle = 0; cycle < bases.length(); cycle++) {
|
||||||
double[] fourintensity = intensities[cycle + cycle_offset];
|
double[] fourintensity = (RAW) ? rawintensities[cycle + cycle_offset] : intensities[cycle + cycle_offset];
|
||||||
|
|
||||||
FourProb fp = model.computeProbabilities(cycle, fourintensity);
|
FourProb fp = model.computeProbabilities(cycle, fourintensity);
|
||||||
|
|
||||||
//if (cycle == 0) {
|
|
||||||
// System.out.println("result " + intensities[0][0] + " " + intensities[0][1] + " " + intensities[0][2] + " " + intensities[0][3] + " " + bases.charAt(0) + " " + fp.toString());
|
|
||||||
//}
|
|
||||||
|
|
||||||
asciiseq[cycle] = (byte) fp.baseAtRank(0);
|
asciiseq[cycle] = (byte) fp.baseAtRank(0);
|
||||||
bestqual[cycle] = fp.qualAtRank(0);
|
bestqual[cycle] = fp.qualAtRank(0);
|
||||||
nextbestqual[cycle] = QualityUtils.baseAndProbToCompressedQuality(fp.indexAtRank(1), fp.probAtRank(1));
|
nextbestqual[cycle] = QualityUtils.baseAndProbToCompressedQuality(fp.indexAtRank(1), fp.probAtRank(1));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue