diff --git a/java/src/org/broadinstitute/sting/alignment/Alignment.java b/java/src/org/broadinstitute/sting/alignment/Alignment.java index 7e70abab1..9ca765c5c 100644 --- a/java/src/org/broadinstitute/sting/alignment/Alignment.java +++ b/java/src/org/broadinstitute/sting/alignment/Alignment.java @@ -7,21 +7,29 @@ package org.broadinstitute.sting.alignment; * @version 0.1 */ public interface Alignment extends Comparable { - /** - * Gets the starting position for the given alignment. - * @return Starting position. - */ - public int getAlignmentStart(); - /** * Is the given alignment on the reverse strand? * @return True if the alignment is on the reverse strand. */ public boolean isNegativeStrand(); + /** + * Gets the starting position for the given alignment. + * @return Starting position. + */ + public int getAlignmentStart(); + /** * Gets the score of this alignment. * @return The score. */ public int getScore(); + + /** + * Temporary getters. + * @return + */ + public int getMismatches(); + public int getGapOpens(); + public int getGapExtensions(); } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java index afce86bb9..4e11e4e2c 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java @@ -12,8 +12,6 @@ import java.util.List; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileReader; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; /** * A test harness to ensure that the perfect aligner works. @@ -51,19 +49,21 @@ public class AlignerTestHarness { for(SAMRecord read: reader) { count++; - //if( count > 500 ) break; - //if( count != 39 /*&& count != 15*/ ) continue; + //if( count > 5000 ) break; + //if( count != 10959 ) continue; //if( !read.getReadName().endsWith("1507:1636#0") ) // continue; boolean skipRead = false; + /* for( CigarElement cigarElement: read.getCigar().getCigarElements() ) { if( cigarElement.getOperator() != CigarOperator.M ) { System.out.printf("Skipping read %s because it features indels%n", read.getReadName()); skipRead = true; } } + */ if(read.getReadString().indexOf("N") >= 0) { System.out.printf("Skipping read %s because it contains Ns%n", read.getReadName()); @@ -74,11 +74,12 @@ public class AlignerTestHarness { List alignments = aligner.align(read); if(alignments.size() == 0 ) - throw new StingException(String.format("Unable to align read %s to reference.",read.getReadName())); - - //System.out.printf("%s: Aligned read to reference at position %d with %d mismatches.%n", read.getReadName(), alignments.get(0).getAlignmentStart(), alignments.get(0).getScore()); + throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count)); Alignment alignment = alignments.get(0); + + System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), alignment.getAlignmentStart(), alignment.getMismatches(), alignment.getGapOpens(), alignment.getGapExtensions()); + if( read.getAlignmentStart() != alignment.getAlignmentStart() ) { IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile); String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()-1).getBases()); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/AlignmentState.java b/java/src/org/broadinstitute/sting/alignment/bwa/AlignmentState.java new file mode 100644 index 000000000..050a44b97 --- /dev/null +++ b/java/src/org/broadinstitute/sting/alignment/bwa/AlignmentState.java @@ -0,0 +1,13 @@ +package org.broadinstitute.sting.alignment.bwa; + +/** + * The current state of an alignment. + * + * @author mhanna + * @version 0.1 + */ +public enum AlignmentState { + MATCH_MISMATCH, + INSERTION, + DELETION +} diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java index 788bb08a3..a9ddead7f 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -48,22 +48,7 @@ public class BWAAligner implements Aligner { /** * Maximum number of gap extensions (-e option from original BWA). */ - private static final int MAXIMUM_GAP_EXTENSIONS = -1; - - /** - * Penalty for straight mismatches (-M option from original BWA). - */ - private static final int MISMATCH_PENALTY = 3; - - /** - * Penalty for gap opens (-O option from original BWA). - */ - private static final int GAP_OPEN_PENALTY = 11; - - /** - * Penalty for gap extensions (-E option from original BWA). - */ - private static final int GAP_EXTENSION_PENALTY = 4; + private static final int MAXIMUM_GAP_EXTENSIONS = 6; public BWAAligner( File forwardBWTFile, File reverseBWTFile, File reverseSuffixArrayFile ) { forwardBWT = new BWTReader(forwardBWTFile).read(); @@ -91,6 +76,7 @@ public class BWAAligner implements Aligner { initial.position = 0; initial.loBound = 0; initial.hiBound = forwardBWT.length(); + initial.state = AlignmentState.MATCH_MISMATCH; initial.mismatches = 0; BWAAlignment initialReverse = new BWAAlignment(); @@ -98,6 +84,7 @@ public class BWAAligner implements Aligner { initialReverse.position = 0; initialReverse.loBound = 0; initialReverse.hiBound = reverseBWT.length(); + initialReverse.state = AlignmentState.MATCH_MISMATCH; initialReverse.mismatches = 0; alignments.add(initial); @@ -112,32 +99,103 @@ public class BWAAligner implements Aligner { // Done with this particular alignment. if(alignment.position == read.getReadLength()-1) { - alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()) + 1; + alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()-alignment.gapOpens-alignment.gapExtensions) + 1; return Collections.singletonList(alignment); } - //System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d%n", alignments.size(), alignment, lowerBounds.get(alignment.position).value); + //System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value); // if z < D(i) then return {} - int mismatches = MAXIMUM_EDIT_DISTANCE - alignment.mismatches; + int mismatches = MAXIMUM_EDIT_DISTANCE - alignment.mismatches - alignment.gapOpens; if( mismatches < lowerBounds.get(alignment.position+1).value ) continue; if( alignment.mismatches > MAXIMUM_EDIT_DISTANCE ) continue; - // For each base in { A, C, G, T } + if( alignment.state == AlignmentState.MATCH_MISMATCH ) { + if( alignment.gapOpens < MAXIMUM_GAP_OPENS ) { + // Add a potential insertion. + BWAAlignment newAlignment = new BWAAlignment(); + newAlignment.negativeStrand = alignment.negativeStrand; + newAlignment.position = alignment.position + 1; + newAlignment.state = AlignmentState.INSERTION; + newAlignment.gapOpens = alignment.gapOpens + 1; + newAlignment.gapExtensions = alignment.gapExtensions; + + newAlignment.loBound = alignment.loBound; + newAlignment.hiBound = alignment.hiBound; + + alignments.add(newAlignment); + + // Add a potential deletion by marking a deletion and augmenting the position. + for(Base base: EnumSet.allOf(Base.class)) { + newAlignment = new BWAAlignment(); + newAlignment.negativeStrand = alignment.negativeStrand; + newAlignment.position = alignment.position; + newAlignment.state = AlignmentState.DELETION; + newAlignment.gapOpens = alignment.gapOpens + 1; + newAlignment.gapExtensions = alignment.gapExtensions; + + newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + + if( newAlignment.loBound <= newAlignment.hiBound ) + alignments.add(newAlignment); + } + } + } + else if( alignment.state == AlignmentState.INSERTION ) { + if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS ) { + // Add a potential insertion extension. + BWAAlignment newAlignment = new BWAAlignment(); + newAlignment.negativeStrand = alignment.negativeStrand; + newAlignment.position = alignment.position + 1; + newAlignment.state = AlignmentState.INSERTION; + newAlignment.gapOpens = alignment.gapOpens; + newAlignment.gapExtensions = alignment.gapExtensions+1; + + newAlignment.loBound = alignment.loBound; + newAlignment.hiBound = alignment.hiBound; + + if( newAlignment.loBound <= newAlignment.hiBound ) + alignments.add(newAlignment); + } + } + else if( alignment.state == AlignmentState.DELETION ) { + if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS ) { + // Add a potential deletion by marking a deletion and augmenting the position. + for(Base base: EnumSet.allOf(Base.class)) { + BWAAlignment newAlignment = new BWAAlignment(); + newAlignment.negativeStrand = alignment.negativeStrand; + newAlignment.position = alignment.position; + newAlignment.state = AlignmentState.DELETION; + newAlignment.gapOpens = alignment.gapOpens; + newAlignment.gapExtensions = alignment.gapExtensions + 1; + + newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + + alignments.add(newAlignment); + } + } + } + + // Mismatches for(Base base: EnumSet.allOf(Base.class)) { // Create and initialize a new alignment, given that base as the candidate. BWAAlignment newAlignment = new BWAAlignment(); newAlignment.negativeStrand = alignment.negativeStrand; newAlignment.position = alignment.position + 1; - - newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; - newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + newAlignment.state = AlignmentState.MATCH_MISMATCH; newAlignment.mismatches = alignment.mismatches; if( base.toASCII() != bases[newAlignment.position] ) newAlignment.mismatches++; + newAlignment.gapOpens = alignment.gapOpens; + newAlignment.gapExtensions = alignment.gapExtensions; + + newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); // If this alignment is valid, add it to the list. if( newAlignment.loBound <= newAlignment.hiBound ) diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java index bcb7c0cce..ed3c21681 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java @@ -1,8 +1,6 @@ package org.broadinstitute.sting.alignment.bwa; import org.broadinstitute.sting.alignment.Alignment; -import org.broadinstitute.sting.alignment.bwa.bwt.BWT; -import net.sf.samtools.SAMRecord; /** * An alignment object to be used incrementally as the BWA aligner @@ -12,8 +10,6 @@ import net.sf.samtools.SAMRecord; * @version 0.1 */ public class BWAAlignment implements Alignment { - enum State { MATCH, INSERTION, DELETION } - /** * Start of the final alignment. */ @@ -34,6 +30,16 @@ public class BWAAlignment implements Alignment { */ protected int mismatches; + /** + * Number of gap opens in alignment. + */ + protected int gapOpens; + + /** + * Number of gap extensions in alignment. + */ + protected int gapExtensions; + /** * Working variable. The lower bound of the alignment within the BWT. */ @@ -47,7 +53,7 @@ public class BWAAlignment implements Alignment { /** * Indicates the current state of an alignment. Are we in an insertion? Deletion? */ - protected State alignmentState; + protected AlignmentState state; /** * Gets the starting position for the given alignment. @@ -70,9 +76,13 @@ public class BWAAlignment implements Alignment { * @return BWA-style scores. 0 is best. */ public int getScore() { - return mismatches; + return mismatches + gapOpens + gapExtensions; } + public int getMismatches() { return mismatches; } + public int getGapOpens() { return gapOpens; } + public int getGapExtensions() { return gapExtensions; } + /** * Compare this alignment to another alignment. * @param other Other alignment to which to compare. @@ -88,6 +98,6 @@ public class BWAAlignment implements Alignment { } public String toString() { - return String.format("position = %d, mismatches = %d, loBound = %d, hiBound = %d, score = %d", position, mismatches, loBound, hiBound, getScore()); + return String.format("position: %d, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d", position, state, mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore()); } }