From 6f1559bd7798681229926e46697c65598f32961c Mon Sep 17 00:00:00 2001
From: kiran <kiran@348d0f76-0448-11de-a6fe-93d51630548a>
Date: Fri, 15 May 2009 21:22:24 +0000
Subject: [PATCH] Cleaned up a bit.  Added some documentation.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@728 348d0f76-0448-11de-a6fe-93d51630548a
---
 .../secondarybase/AnnotateSecondaryBase.java  | 44 +++++++++++++++----
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java b/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java
index 537dd105f..ba7494074 100755
--- a/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java
+++ b/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java
@@ -10,6 +10,20 @@ import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
 
 import java.io.File;
 
+/**
+ * AnnotateSecondaryBase computes the second best base for every base in an Illumina lane.
+ * First, a statistical model is fit to a subset of the raw Illumina intensities (i.e. those
+ * generated by Illumina's "Firecrest" package).  Then, every read's set of raw intensities
+ * is evaluated against this model to determine the base probability distribution of a given
+ * base observation.
+ *
+ * Approximately 95% of the time, this method and Illumina's basecalling package, "Bustard",
+ * agree on the identity of the best base.  In these cases, we simply annotate the
+ * second-best base.  In cases where this method and Bustard disagree, we annotate the
+ * secondary base as this method's primary base.
+ *
+ * @author Kiran Garimella
+ */
 public class AnnotateSecondaryBase extends CommandLineProgram {
     public static AnnotateSecondaryBase Instance = null;
 
@@ -33,15 +47,16 @@ public class AnnotateSecondaryBase extends CommandLineProgram {
     protected int execute() {
         BasecallingTrainingSet trainingSet = new BasecallingTrainingSet(BUSTARD_DIR, LANE, CYCLE_BEGIN, CYCLE_END, TRAINING_LIMIT);
 
-        if (SAM_IN == null || !SAM_IN.exists()) {
-            // Iterate through raw Firecrest data and store the first N reads up to TRAINING_LIMIT
-            System.out.println("Loading training set from the first " + TRAINING_LIMIT + " reads in the raw data...");
-            trainingSet.loadFirstNUnambiguousReadsTrainingSet();
-        } else {
-            // Find alignments with zero mismatches and store them until we've picked up TRAINING_LIMIT alignments
-            System.out.println("Loading training set from the first " + TRAINING_LIMIT + " perfect reads in the aligned data...");
-            trainingSet.loadPreAlignedTrainingSet(SAM_IN, REFERENCE);
-        }
+        /*
+        // This doesn't work right now...
+        // Find alignments with zero mismatches and store them until we've picked up TRAINING_LIMIT alignments
+        System.out.println("Loading training set from the first " + TRAINING_LIMIT + " perfect reads in the aligned data...");
+        trainingSet.loadPreAlignedTrainingSet(SAM_IN, REFERENCE);
+        */
+
+        // Iterate through raw Firecrest data and store the first N reads up to TRAINING_LIMIT
+        System.out.println("Loading training set from the first " + TRAINING_LIMIT + " reads in the raw data...");
+        trainingSet.loadFirstNUnambiguousReadsTrainingSet();
 
         // Iterate through the stored training data and add the info to the BasecallingReadModel
         System.out.println("Applying training set...");
@@ -93,6 +108,17 @@ public class AnnotateSecondaryBase extends CommandLineProgram {
         return 0;
     }
 
+    /**
+     * Construct a SAMRecord object with the specified information.  The secondary bases
+     * will be annotated suchthat they will not conflict with the primary base.
+     *
+     * @param rr          the raw Illumina read
+     * @param fpr         the four-base distributions for every base in the read
+     * @param sfh         the SAM header
+     * @param runBarcode  the run barcode of the lane (used to prefix the reads)
+     *
+     * @return a fully-constructed SAM record
+     */
     private SAMRecord constructSAMRecord(RawRead rr, FourProbRead fpr, SAMFileHeader sfh, String runBarcode) {
         SAMRecord sr = new SAMRecord(sfh);