Indels supported. Variable gap penalties are not yet taken into account.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1720 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-09-24 21:03:02 +00:00
parent d2af26e81f
commit 08e8d2183a
5 changed files with 133 additions and 43 deletions

View File

@ -7,21 +7,29 @@ package org.broadinstitute.sting.alignment;
* @version 0.1
*/
public interface Alignment extends Comparable<Alignment> {
/**
* Gets the starting position for the given alignment.
* @return Starting position.
*/
public int getAlignmentStart();
/**
* Is the given alignment on the reverse strand?
* @return True if the alignment is on the reverse strand.
*/
public boolean isNegativeStrand();
/**
* Gets the starting position for the given alignment.
* @return Starting position.
*/
public int getAlignmentStart();
/**
* Gets the score of this alignment.
* @return The score.
*/
public int getScore();
/**
* Temporary getters.
* @return
*/
public int getMismatches();
public int getGapOpens();
public int getGapExtensions();
}

View File

@ -12,8 +12,6 @@ import java.util.List;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
/**
* A test harness to ensure that the perfect aligner works.
@ -51,19 +49,21 @@ public class AlignerTestHarness {
for(SAMRecord read: reader) {
count++;
//if( count > 500 ) break;
//if( count != 39 /*&& count != 15*/ ) continue;
//if( count > 5000 ) break;
//if( count != 10959 ) continue;
//if( !read.getReadName().endsWith("1507:1636#0") )
// continue;
boolean skipRead = false;
/*
for( CigarElement cigarElement: read.getCigar().getCigarElements() ) {
if( cigarElement.getOperator() != CigarOperator.M ) {
System.out.printf("Skipping read %s because it features indels%n", read.getReadName());
skipRead = true;
}
}
*/
if(read.getReadString().indexOf("N") >= 0) {
System.out.printf("Skipping read %s because it contains Ns%n", read.getReadName());
@ -74,11 +74,12 @@ public class AlignerTestHarness {
List<Alignment> alignments = aligner.align(read);
if(alignments.size() == 0 )
throw new StingException(String.format("Unable to align read %s to reference.",read.getReadName()));
//System.out.printf("%s: Aligned read to reference at position %d with %d mismatches.%n", read.getReadName(), alignments.get(0).getAlignmentStart(), alignments.get(0).getScore());
throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
Alignment alignment = alignments.get(0);
System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), alignment.getAlignmentStart(), alignment.getMismatches(), alignment.getGapOpens(), alignment.getGapExtensions());
if( read.getAlignmentStart() != alignment.getAlignmentStart() ) {
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()-1).getBases());

View File

@ -0,0 +1,13 @@
package org.broadinstitute.sting.alignment.bwa;
/**
* The current state of an alignment.
*
* @author mhanna
* @version 0.1
*/
public enum AlignmentState {
MATCH_MISMATCH,
INSERTION,
DELETION
}

View File

@ -48,22 +48,7 @@ public class BWAAligner implements Aligner {
/**
* Maximum number of gap extensions (-e option from original BWA).
*/
private static final int MAXIMUM_GAP_EXTENSIONS = -1;
/**
* Penalty for straight mismatches (-M option from original BWA).
*/
private static final int MISMATCH_PENALTY = 3;
/**
* Penalty for gap opens (-O option from original BWA).
*/
private static final int GAP_OPEN_PENALTY = 11;
/**
* Penalty for gap extensions (-E option from original BWA).
*/
private static final int GAP_EXTENSION_PENALTY = 4;
private static final int MAXIMUM_GAP_EXTENSIONS = 6;
public BWAAligner( File forwardBWTFile, File reverseBWTFile, File reverseSuffixArrayFile ) {
forwardBWT = new BWTReader(forwardBWTFile).read();
@ -91,6 +76,7 @@ public class BWAAligner implements Aligner {
initial.position = 0;
initial.loBound = 0;
initial.hiBound = forwardBWT.length();
initial.state = AlignmentState.MATCH_MISMATCH;
initial.mismatches = 0;
BWAAlignment initialReverse = new BWAAlignment();
@ -98,6 +84,7 @@ public class BWAAligner implements Aligner {
initialReverse.position = 0;
initialReverse.loBound = 0;
initialReverse.hiBound = reverseBWT.length();
initialReverse.state = AlignmentState.MATCH_MISMATCH;
initialReverse.mismatches = 0;
alignments.add(initial);
@ -112,32 +99,103 @@ public class BWAAligner implements Aligner {
// Done with this particular alignment.
if(alignment.position == read.getReadLength()-1) {
alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()) + 1;
alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()-alignment.gapOpens-alignment.gapExtensions) + 1;
return Collections.<Alignment>singletonList(alignment);
}
//System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d%n", alignments.size(), alignment, lowerBounds.get(alignment.position).value);
//System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value);
// if z < D(i) then return {}
int mismatches = MAXIMUM_EDIT_DISTANCE - alignment.mismatches;
int mismatches = MAXIMUM_EDIT_DISTANCE - alignment.mismatches - alignment.gapOpens;
if( mismatches < lowerBounds.get(alignment.position+1).value )
continue;
if( alignment.mismatches > MAXIMUM_EDIT_DISTANCE )
continue;
// For each base in { A, C, G, T }
if( alignment.state == AlignmentState.MATCH_MISMATCH ) {
if( alignment.gapOpens < MAXIMUM_GAP_OPENS ) {
// Add a potential insertion.
BWAAlignment newAlignment = new BWAAlignment();
newAlignment.negativeStrand = alignment.negativeStrand;
newAlignment.position = alignment.position + 1;
newAlignment.state = AlignmentState.INSERTION;
newAlignment.gapOpens = alignment.gapOpens + 1;
newAlignment.gapExtensions = alignment.gapExtensions;
newAlignment.loBound = alignment.loBound;
newAlignment.hiBound = alignment.hiBound;
alignments.add(newAlignment);
// Add a potential deletion by marking a deletion and augmenting the position.
for(Base base: EnumSet.allOf(Base.class)) {
newAlignment = new BWAAlignment();
newAlignment.negativeStrand = alignment.negativeStrand;
newAlignment.position = alignment.position;
newAlignment.state = AlignmentState.DELETION;
newAlignment.gapOpens = alignment.gapOpens + 1;
newAlignment.gapExtensions = alignment.gapExtensions;
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
if( newAlignment.loBound <= newAlignment.hiBound )
alignments.add(newAlignment);
}
}
}
else if( alignment.state == AlignmentState.INSERTION ) {
if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS ) {
// Add a potential insertion extension.
BWAAlignment newAlignment = new BWAAlignment();
newAlignment.negativeStrand = alignment.negativeStrand;
newAlignment.position = alignment.position + 1;
newAlignment.state = AlignmentState.INSERTION;
newAlignment.gapOpens = alignment.gapOpens;
newAlignment.gapExtensions = alignment.gapExtensions+1;
newAlignment.loBound = alignment.loBound;
newAlignment.hiBound = alignment.hiBound;
if( newAlignment.loBound <= newAlignment.hiBound )
alignments.add(newAlignment);
}
}
else if( alignment.state == AlignmentState.DELETION ) {
if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS ) {
// Add a potential deletion by marking a deletion and augmenting the position.
for(Base base: EnumSet.allOf(Base.class)) {
BWAAlignment newAlignment = new BWAAlignment();
newAlignment.negativeStrand = alignment.negativeStrand;
newAlignment.position = alignment.position;
newAlignment.state = AlignmentState.DELETION;
newAlignment.gapOpens = alignment.gapOpens;
newAlignment.gapExtensions = alignment.gapExtensions + 1;
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
alignments.add(newAlignment);
}
}
}
// Mismatches
for(Base base: EnumSet.allOf(Base.class)) {
// Create and initialize a new alignment, given that base as the candidate.
BWAAlignment newAlignment = new BWAAlignment();
newAlignment.negativeStrand = alignment.negativeStrand;
newAlignment.position = alignment.position + 1;
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
newAlignment.state = AlignmentState.MATCH_MISMATCH;
newAlignment.mismatches = alignment.mismatches;
if( base.toASCII() != bases[newAlignment.position] )
newAlignment.mismatches++;
newAlignment.gapOpens = alignment.gapOpens;
newAlignment.gapExtensions = alignment.gapExtensions;
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
// If this alignment is valid, add it to the list.
if( newAlignment.loBound <= newAlignment.hiBound )

View File

@ -1,8 +1,6 @@
package org.broadinstitute.sting.alignment.bwa;
import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.sting.alignment.bwa.bwt.BWT;
import net.sf.samtools.SAMRecord;
/**
* An alignment object to be used incrementally as the BWA aligner
@ -12,8 +10,6 @@ import net.sf.samtools.SAMRecord;
* @version 0.1
*/
public class BWAAlignment implements Alignment {
enum State { MATCH, INSERTION, DELETION }
/**
* Start of the final alignment.
*/
@ -34,6 +30,16 @@ public class BWAAlignment implements Alignment {
*/
protected int mismatches;
/**
* Number of gap opens in alignment.
*/
protected int gapOpens;
/**
* Number of gap extensions in alignment.
*/
protected int gapExtensions;
/**
* Working variable. The lower bound of the alignment within the BWT.
*/
@ -47,7 +53,7 @@ public class BWAAlignment implements Alignment {
/**
* Indicates the current state of an alignment. Are we in an insertion? Deletion?
*/
protected State alignmentState;
protected AlignmentState state;
/**
* Gets the starting position for the given alignment.
@ -70,9 +76,13 @@ public class BWAAlignment implements Alignment {
* @return BWA-style scores. 0 is best.
*/
public int getScore() {
return mismatches;
return mismatches + gapOpens + gapExtensions;
}
public int getMismatches() { return mismatches; }
public int getGapOpens() { return gapOpens; }
public int getGapExtensions() { return gapExtensions; }
/**
* Compare this alignment to another alignment.
* @param other Other alignment to which to compare.
@ -88,6 +98,6 @@ public class BWAAlignment implements Alignment {
}
public String toString() {
return String.format("position = %d, mismatches = %d, loBound = %d, hiBound = %d, score = %d", position, mismatches, loBound, hiBound, getScore());
return String.format("position: %d, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d", position, state, mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore());
}
}