diff --git a/java/src/org/broadinstitute/sting/secondarybase/AbstractFirecrestFileParser.java b/java/src/org/broadinstitute/sting/secondarybase/AbstractFirecrestFileParser.java index e947d446b..80d42682a 100644 --- a/java/src/org/broadinstitute/sting/secondarybase/AbstractFirecrestFileParser.java +++ b/java/src/org/broadinstitute/sting/secondarybase/AbstractFirecrestFileParser.java @@ -17,10 +17,6 @@ import java.io.File; import java.io.FilenameFilter; import java.util.*; -import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.secondarybase.FirecrestReadData; -import org.broadinstitute.sting.secondarybase.FirecrestFilenameComparator; - /** * Abstract base class for implementing parsers for various versions of Firecrest output */ diff --git a/java/src/org/broadinstitute/sting/secondarybase/AddFourProbsToSAM.java b/java/src/org/broadinstitute/sting/secondarybase/AddFourProbsToSAM.java index c9b0e47db..de0544e9e 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/AddFourProbsToSAM.java +++ b/java/src/org/broadinstitute/sting/secondarybase/AddFourProbsToSAM.java @@ -1,14 +1,12 @@ package org.broadinstitute.sting.playground.fourbasecaller; -import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; -import org.broadinstitute.sting.utils.cmdLine.Argument; +import net.sf.samtools.*; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; import java.io.File; import java.util.HashMap; -import net.sf.samtools.*; - public class AddFourProbsToSAM extends CommandLineProgram { public static AddFourProbsToSAM Instance = null; diff --git a/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java b/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java index 04aba3f40..a0fe74454 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java +++ b/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java @@ -1,18 +1,14 @@ package org.broadinstitute.sting.secondarybase; -import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.secondarybase.BasecallingReadModel; - -import java.io.File; -import java.util.HashMap; -import java.util.Vector; - -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileWriterFactory; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; + +import java.io.File; public class AnnotateSecondaryBase extends CommandLineProgram { public static AnnotateSecondaryBase Instance = null; @@ -58,10 +54,10 @@ public class AnnotateSecondaryBase extends CommandLineProgram { SAMFileWriter sfw = new SAMFileWriterFactory().makeSAMOrBAMWriter(sfh, false, SAM_OUT); IlluminaParser iparser = new IlluminaParser(BUSTARD_DIR, LANE, CYCLE_BEGIN, CYCLE_END); + RawRead rr; int basesConsistent = 0, basesTotal = 0; - RawRead rr; while ((rr = iparser.next()) != null) { FourProbRead fpr = model.call(rr); @@ -87,10 +83,9 @@ public class AnnotateSecondaryBase extends CommandLineProgram { } iparser.close(); - sfw.close(); - System.out.println("Done."); + System.out.println("\nDone."); return 0; } diff --git a/java/src/org/broadinstitute/sting/secondarybase/BasecallingBaseModel.java b/java/src/org/broadinstitute/sting/secondarybase/BasecallingBaseModel.java index 0199a7562..9ce8a31d7 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/BasecallingBaseModel.java +++ b/java/src/org/broadinstitute/sting/secondarybase/BasecallingBaseModel.java @@ -1,14 +1,15 @@ package org.broadinstitute.sting.secondarybase; -import cern.colt.matrix.DoubleMatrix1D; import cern.colt.matrix.DoubleFactory1D; -import cern.colt.matrix.DoubleMatrix2D; import cern.colt.matrix.DoubleFactory2D; +import cern.colt.matrix.DoubleMatrix1D; +import cern.colt.matrix.DoubleMatrix2D; import cern.colt.matrix.linalg.Algebra; - import org.broadinstitute.sting.utils.BaseUtils; -import java.io.*; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; /** * BasecallingBaseModel is a class that represents the statistical diff --git a/java/src/org/broadinstitute/sting/secondarybase/BasecallingReadModel.java b/java/src/org/broadinstitute/sting/secondarybase/BasecallingReadModel.java index 4d0054248..2e52ae94b 100644 --- a/java/src/org/broadinstitute/sting/secondarybase/BasecallingReadModel.java +++ b/java/src/org/broadinstitute/sting/secondarybase/BasecallingReadModel.java @@ -2,8 +2,6 @@ package org.broadinstitute.sting.secondarybase; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.secondarybase.BasecallingBaseModel; -import org.broadinstitute.sting.secondarybase.FourProb; import java.io.File; import java.util.ArrayList; diff --git a/java/src/org/broadinstitute/sting/secondarybase/BasecallingTrainingSet.java b/java/src/org/broadinstitute/sting/secondarybase/BasecallingTrainingSet.java index c58ab2384..c0ae20ca8 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/BasecallingTrainingSet.java +++ b/java/src/org/broadinstitute/sting/secondarybase/BasecallingTrainingSet.java @@ -1,20 +1,24 @@ package org.broadinstitute.sting.secondarybase; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; - -import java.util.HashMap; -import java.util.Vector; -import java.util.ArrayList; -import java.util.regex.Pattern; -import java.util.regex.Matcher; -import java.io.File; - -import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2; +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Vector; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * BasecallingTrainingSet holds a set of raw read sequences, their raw intensities, and quality scores. + * + * @author Kiran Garimella + */ public class BasecallingTrainingSet { private File bustardDir; private int lane; @@ -24,6 +28,15 @@ public class BasecallingTrainingSet { private ArrayList trainingData; + /** + * Constructor for BasecallingTrainingSet. + * + * @param bustardDir the Bustard directory for the sample + * @param lane the lane for the sample + * @param cycleBegin the start cycle for the beginning of the read (0-based, inclusive) + * @param cycleEnd the stop ccle for the end of the read (0-based, inclusive) + * @param trainingLimit the number of training reads to accept + */ public BasecallingTrainingSet(File bustardDir, int lane, int cycleBegin, int cycleEnd, int trainingLimit) { this.bustardDir = bustardDir; this.lane = lane; @@ -32,14 +45,27 @@ public class BasecallingTrainingSet { this.trainingLimit = trainingLimit; } + /** + * Get the training data array list + * + * @return the arraylist of raw training reads + */ public ArrayList getTrainingData() { return this.trainingData; } + /** + * Set the training data array list + * + * @param trainingData the arraylist of raw training reads + */ public void setTrainingData(ArrayList trainingData) { this.trainingData = trainingData; } + /** + * Take the first N reads that have no ambiguous bases and add them to the training set. + */ public void loadFirstNUnambiguousReadsTrainingSet() { this.trainingData = new ArrayList(trainingLimit); @@ -65,12 +91,25 @@ public class BasecallingTrainingSet { } } + /** + * Load a training set from perfect reads in an already-aligned bam file + * + * @param samIn the SAM/BAM file to load the reads from + * @param reference the reference to which the reads should be compared + */ public void loadPreAlignedTrainingSet(File samIn, File reference) { Vector< HashMap > trainingReads = getPerfectAlignmentsByTile(samIn, reference); trainingData = correlateReadsAndIntensities(trainingReads); } + /** + * Find perfect reads and group them by tile. + * + * @param samIn the SAM/BAM file to load the raeds from + * @param reference the reference to which the reads should be compared + * @return a vector of perfect reads, grouped by tile + */ private Vector> getPerfectAlignmentsByTile(File samIn, File reference) { FastaSequenceFile2 ref = new FastaSequenceFile2(reference); String currentContig = "none"; @@ -132,6 +171,12 @@ public class BasecallingTrainingSet { return trainingReads; } + /** + * Correlate the perfect reads with their raw intensities. Sloooooooow. + * + * @param trainingReads the perfect reads, grouped by tile + * @return a training set of raw sequence, intensities, and quality scores (all set to 40 for these perfect bases) + */ private ArrayList correlateReadsAndIntensities(Vector> trainingReads) { ArrayList newTrainingData = new ArrayList(trainingLimit); diff --git a/java/src/org/broadinstitute/sting/secondarybase/CombineSamAndFourProbs.java b/java/src/org/broadinstitute/sting/secondarybase/CombineSamAndFourProbs.java index 7f8ec73e8..d2805337a 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/CombineSamAndFourProbs.java +++ b/java/src/org/broadinstitute/sting/secondarybase/CombineSamAndFourProbs.java @@ -1,16 +1,18 @@ package org.broadinstitute.sting.secondarybase; -import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.QualityUtils; - -import java.io.*; -import java.util.HashMap; - import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileWriterFactory; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.HashMap; public class CombineSamAndFourProbs extends CommandLineProgram { public static CombineSamAndFourProbs Instance = null; diff --git a/java/src/org/broadinstitute/sting/secondarybase/FirecrestFileParser.java b/java/src/org/broadinstitute/sting/secondarybase/FirecrestFileParser.java index 6c5fdc0d7..9bece319d 100644 --- a/java/src/org/broadinstitute/sting/secondarybase/FirecrestFileParser.java +++ b/java/src/org/broadinstitute/sting/secondarybase/FirecrestFileParser.java @@ -9,15 +9,12 @@ */ package org.broadinstitute.sting.secondarybase; -import edu.mit.broad.picard.util.PasteParser; -import edu.mit.broad.picard.util.FormatUtil; -import edu.mit.broad.picard.util.BasicTextFileParser; import edu.mit.broad.picard.PicardException; +import edu.mit.broad.picard.util.BasicTextFileParser; +import edu.mit.broad.picard.util.FormatUtil; import java.io.File; -import org.broadinstitute.sting.secondarybase.FirecrestReadData; - /** * Class to parse the data in an Illumina Firecrest directory and return an iterator over that data, in order * by tile. diff --git a/java/src/org/broadinstitute/sting/secondarybase/FourBaseRecaller.java b/java/src/org/broadinstitute/sting/secondarybase/FourBaseRecaller.java index 8780af858..6f9f2749b 100644 --- a/java/src/org/broadinstitute/sting/secondarybase/FourBaseRecaller.java +++ b/java/src/org/broadinstitute/sting/secondarybase/FourBaseRecaller.java @@ -1,20 +1,17 @@ package org.broadinstitute.sting.secondarybase; -import edu.mit.broad.picard.illumina.BustardFileParser; -import edu.mit.broad.picard.illumina.BustardReadData; import edu.mit.broad.picard.illumina.AbstractBustardFileParser; +import edu.mit.broad.picard.illumina.BustardFileParser; import edu.mit.broad.picard.illumina.BustardFileParser_1_1; +import edu.mit.broad.picard.illumina.BustardReadData; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileWriterFactory; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.secondarybase.FirecrestFileParser; -import org.broadinstitute.sting.secondarybase.FirecrestReadData; -import org.broadinstitute.sting.secondarybase.FourProb; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; import java.io.File; import java.io.IOException; diff --git a/java/src/org/broadinstitute/sting/secondarybase/FourIntensity.java b/java/src/org/broadinstitute/sting/secondarybase/FourIntensity.java index f6d88f12c..064031f00 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/FourIntensity.java +++ b/java/src/org/broadinstitute/sting/secondarybase/FourIntensity.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.secondarybase; -import java.util.StringTokenizer; - public class FourIntensity { private float[] fIntensities; diff --git a/java/src/org/broadinstitute/sting/secondarybase/FourProb.java b/java/src/org/broadinstitute/sting/secondarybase/FourProb.java index dbc0259f3..93ec78794 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/FourProb.java +++ b/java/src/org/broadinstitute/sting/secondarybase/FourProb.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.secondarybase; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; /** * FourProb represents four base hypotheses, their probabilities, and the ranking among one another. diff --git a/java/src/org/broadinstitute/sting/secondarybase/FourProbRead.java b/java/src/org/broadinstitute/sting/secondarybase/FourProbRead.java index 6b9fb09f0..40ae72c67 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/FourProbRead.java +++ b/java/src/org/broadinstitute/sting/secondarybase/FourProbRead.java @@ -5,11 +5,25 @@ import org.broadinstitute.sting.utils.QualityUtils; import java.util.ArrayList; +/** + * FourProbRead contains the four-prob information for each base in a read. + */ public class FourProbRead extends ArrayList { + /** + * Initialize the container with the specified capacity + * + * @param initialCapacity the number of bases in the read + */ public FourProbRead(int initialCapacity) { super(initialCapacity); } + /** + * Get the read sequence at a specified rank + * + * @param rank the rank of the sequence to return (0=best, 1=second-best, 2=third-best, 3=fourth-best) + * @return the read sequence at the specified rank + */ public String getBaseSequenceAtGivenRank(int rank) { String pseq = ""; @@ -22,14 +36,28 @@ public class FourProbRead extends ArrayList { return pseq; } + /** + * Get the primary read sequence + * @return the primary read sequence + */ public String getPrimaryBaseSequence() { return getBaseSequenceAtGivenRank(0); } + /** + * Get the secondary read sequence + * @return the secondary read sequence + */ public String getSecondaryBaseSequence() { return getBaseSequenceAtGivenRank(1); } + /** + * Get the SAM spec-conformant SQ tag that will be written to the SAM/BAM file. + * + * @param rr the raw read + * @return the byte array for the SQ tag (first two bits: the base identity, the last six bits: -10*log10(p3/p2). + */ public byte[] getSQTag(RawRead rr) { byte[] sqtag = new byte[this.size()]; diff --git a/java/src/org/broadinstitute/sting/secondarybase/IlluminaParser.java b/java/src/org/broadinstitute/sting/secondarybase/IlluminaParser.java index 224f568c2..1fefd4656 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/IlluminaParser.java +++ b/java/src/org/broadinstitute/sting/secondarybase/IlluminaParser.java @@ -1,19 +1,17 @@ package org.broadinstitute.sting.secondarybase; +import edu.mit.broad.picard.util.BasicTextFileParser; +import edu.mit.broad.picard.util.PasteParser; import org.broadinstitute.sting.utils.StingException; +import java.io.Closeable; import java.io.File; import java.io.FilenameFilter; -import java.io.Closeable; -import java.io.IOException; -import java.util.regex.Pattern; -import java.util.regex.Matcher; import java.util.Arrays; import java.util.Comparator; import java.util.Iterator; - -import edu.mit.broad.picard.util.PasteParser; -import edu.mit.broad.picard.util.BasicTextFileParser; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class IlluminaParser implements Iterator, Iterable, Closeable { private File bustardDir; diff --git a/java/src/org/broadinstitute/sting/secondarybase/MatchSQTagToStrand.java b/java/src/org/broadinstitute/sting/secondarybase/MatchSQTagToStrand.java index 467ac5705..6789b0fcc 100644 --- a/java/src/org/broadinstitute/sting/secondarybase/MatchSQTagToStrand.java +++ b/java/src/org/broadinstitute/sting/secondarybase/MatchSQTagToStrand.java @@ -1,15 +1,14 @@ package org.broadinstitute.sting.secondarybase; -import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.QualityUtils; - -import java.io.File; - import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileWriterFactory; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; + +import java.io.File; public class MatchSQTagToStrand extends CommandLineProgram { public static MatchSQTagToStrand Instance = null; diff --git a/java/src/org/broadinstitute/sting/secondarybase/RawRead.java b/java/src/org/broadinstitute/sting/secondarybase/RawRead.java index 903888cd2..9df4dea79 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/RawRead.java +++ b/java/src/org/broadinstitute/sting/secondarybase/RawRead.java @@ -1,7 +1,10 @@ package org.broadinstitute.sting.secondarybase; -import org.broadinstitute.sting.utils.BaseUtils; - +/** + * RawRead represents lane and tile coordinates, raw intensities, read bases, and quality scores + * + * @author Kiran Garimella + */ public class RawRead { private byte lane; private short tile; @@ -12,6 +15,14 @@ public class RawRead { private byte[] quals; private short[][] intensities; + /** + * Construct a raw read from the output of a PasteParser (in the order of int, seq, prb). + * Takes data within specified cycle ranges. + * + * @param pastedReadString the 3x(fragment length) output array from the PasteParser. + * @param cycleBegin the start cycle for the read (0-based, inclusive) + * @param cycleEnd the end cycle for the read (0-based, inclusive) + */ public RawRead(String[][] pastedReadString, int cycleBegin, int cycleEnd) { lane = Byte.valueOf(pastedReadString[0][0]); tile = Short.valueOf(pastedReadString[0][1]); @@ -43,25 +54,96 @@ public class RawRead { } } + /** + * Get lane number of read + * + * @return lane number of read + */ public byte getLane() { return lane; } + + /** + * Get tile number of read + * + * @return tile number of read + */ public int getTile() { return tile; } + + /** + * Get x-coordinate of read + * + * @return x-coordinate of read + */ public int getXCoordinate() { return x; } + + /** + * Get y-coordinate of read + * + * @return y-coordinate of read + */ public int getYCoordinate() { return y; } + /** + * Get read key (lane:tile:x:y) + * + * @return read key (lane:tile:x:y) + */ public String getReadKey() { return String.format("%d:%d:%d:%d", lane, tile, x, y); } + /** + * Get the read sequence between the cycles specified in the constructor as a byte array + * + * @return read sequence + */ public byte[] getSequence() { return sequence; } + + /** + * Set the read sequence from a byte array + * + * @param sequence the read sequence in byte array form + */ public void setSequence(byte[] sequence) { this.sequence = sequence; } + /** + * Get the read sequence as a string + * + * @return the read sequence in string form + */ public String getSequenceAsString() { return new String(getSequence()); } + /** + * Get the quals + * + * @return a byte array of quals + */ public byte[] getQuals() { return quals; } + + /** + * Set the quals + * + * @param quals a byte array of quals + */ public void setQuals(byte[] quals) { this.quals = quals; } + /** + * Get the raw read intensities + * + * @return the (readLength)x(numChannels) array of raw intensities + */ public short[][] getIntensities() { return intensities; } + + /** + * Set the raw intensities + * + * @param intensities the (readLength)x(numChannels) array of raw intensities + */ public void setIntensities(short[][] intensities) { this.intensities = intensities; } + /** + * Get the read length + * + * @return the read length + */ public int getReadLength() { return sequence.length; } }