Cleaned up a bit. Added some documentation.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@728 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2c4de7b5c5
commit
6f1559bd77
|
|
@ -10,6 +10,20 @@ import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AnnotateSecondaryBase computes the second best base for every base in an Illumina lane.
|
||||||
|
* First, a statistical model is fit to a subset of the raw Illumina intensities (i.e. those
|
||||||
|
* generated by Illumina's "Firecrest" package). Then, every read's set of raw intensities
|
||||||
|
* is evaluated against this model to determine the base probability distribution of a given
|
||||||
|
* base observation.
|
||||||
|
*
|
||||||
|
* Approximately 95% of the time, this method and Illumina's basecalling package, "Bustard",
|
||||||
|
* agree on the identity of the best base. In these cases, we simply annotate the
|
||||||
|
* second-best base. In cases where this method and Bustard disagree, we annotate the
|
||||||
|
* secondary base as this method's primary base.
|
||||||
|
*
|
||||||
|
* @author Kiran Garimella
|
||||||
|
*/
|
||||||
public class AnnotateSecondaryBase extends CommandLineProgram {
|
public class AnnotateSecondaryBase extends CommandLineProgram {
|
||||||
public static AnnotateSecondaryBase Instance = null;
|
public static AnnotateSecondaryBase Instance = null;
|
||||||
|
|
||||||
|
|
@ -33,15 +47,16 @@ public class AnnotateSecondaryBase extends CommandLineProgram {
|
||||||
protected int execute() {
|
protected int execute() {
|
||||||
BasecallingTrainingSet trainingSet = new BasecallingTrainingSet(BUSTARD_DIR, LANE, CYCLE_BEGIN, CYCLE_END, TRAINING_LIMIT);
|
BasecallingTrainingSet trainingSet = new BasecallingTrainingSet(BUSTARD_DIR, LANE, CYCLE_BEGIN, CYCLE_END, TRAINING_LIMIT);
|
||||||
|
|
||||||
if (SAM_IN == null || !SAM_IN.exists()) {
|
/*
|
||||||
// Iterate through raw Firecrest data and store the first N reads up to TRAINING_LIMIT
|
// This doesn't work right now...
|
||||||
System.out.println("Loading training set from the first " + TRAINING_LIMIT + " reads in the raw data...");
|
// Find alignments with zero mismatches and store them until we've picked up TRAINING_LIMIT alignments
|
||||||
trainingSet.loadFirstNUnambiguousReadsTrainingSet();
|
System.out.println("Loading training set from the first " + TRAINING_LIMIT + " perfect reads in the aligned data...");
|
||||||
} else {
|
trainingSet.loadPreAlignedTrainingSet(SAM_IN, REFERENCE);
|
||||||
// Find alignments with zero mismatches and store them until we've picked up TRAINING_LIMIT alignments
|
*/
|
||||||
System.out.println("Loading training set from the first " + TRAINING_LIMIT + " perfect reads in the aligned data...");
|
|
||||||
trainingSet.loadPreAlignedTrainingSet(SAM_IN, REFERENCE);
|
// Iterate through raw Firecrest data and store the first N reads up to TRAINING_LIMIT
|
||||||
}
|
System.out.println("Loading training set from the first " + TRAINING_LIMIT + " reads in the raw data...");
|
||||||
|
trainingSet.loadFirstNUnambiguousReadsTrainingSet();
|
||||||
|
|
||||||
// Iterate through the stored training data and add the info to the BasecallingReadModel
|
// Iterate through the stored training data and add the info to the BasecallingReadModel
|
||||||
System.out.println("Applying training set...");
|
System.out.println("Applying training set...");
|
||||||
|
|
@ -93,6 +108,17 @@ public class AnnotateSecondaryBase extends CommandLineProgram {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a SAMRecord object with the specified information. The secondary bases
|
||||||
|
* will be annotated suchthat they will not conflict with the primary base.
|
||||||
|
*
|
||||||
|
* @param rr the raw Illumina read
|
||||||
|
* @param fpr the four-base distributions for every base in the read
|
||||||
|
* @param sfh the SAM header
|
||||||
|
* @param runBarcode the run barcode of the lane (used to prefix the reads)
|
||||||
|
*
|
||||||
|
* @return a fully-constructed SAM record
|
||||||
|
*/
|
||||||
private SAMRecord constructSAMRecord(RawRead rr, FourProbRead fpr, SAMFileHeader sfh, String runBarcode) {
|
private SAMRecord constructSAMRecord(RawRead rr, FourProbRead fpr, SAMFileHeader sfh, String runBarcode) {
|
||||||
SAMRecord sr = new SAMRecord(sfh);
|
SAMRecord sr = new SAMRecord(sfh);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue