Indels supported. Variable gap penalties are not yet taken into account.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1720 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d2af26e81f
commit
08e8d2183a
|
|
@ -7,21 +7,29 @@ package org.broadinstitute.sting.alignment;
|
|||
* @version 0.1
|
||||
*/
|
||||
public interface Alignment extends Comparable<Alignment> {
|
||||
/**
|
||||
* Gets the starting position for the given alignment.
|
||||
* @return Starting position.
|
||||
*/
|
||||
public int getAlignmentStart();
|
||||
|
||||
/**
|
||||
* Is the given alignment on the reverse strand?
|
||||
* @return True if the alignment is on the reverse strand.
|
||||
*/
|
||||
public boolean isNegativeStrand();
|
||||
|
||||
/**
|
||||
* Gets the starting position for the given alignment.
|
||||
* @return Starting position.
|
||||
*/
|
||||
public int getAlignmentStart();
|
||||
|
||||
/**
|
||||
* Gets the score of this alignment.
|
||||
* @return The score.
|
||||
*/
|
||||
public int getScore();
|
||||
|
||||
/**
|
||||
* Temporary getters.
|
||||
* @return
|
||||
*/
|
||||
public int getMismatches();
|
||||
public int getGapOpens();
|
||||
public int getGapExtensions();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,8 +12,6 @@ import java.util.List;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
|
||||
/**
|
||||
* A test harness to ensure that the perfect aligner works.
|
||||
|
|
@ -51,19 +49,21 @@ public class AlignerTestHarness {
|
|||
|
||||
for(SAMRecord read: reader) {
|
||||
count++;
|
||||
//if( count > 500 ) break;
|
||||
//if( count != 39 /*&& count != 15*/ ) continue;
|
||||
//if( count > 5000 ) break;
|
||||
//if( count != 10959 ) continue;
|
||||
//if( !read.getReadName().endsWith("1507:1636#0") )
|
||||
// continue;
|
||||
|
||||
boolean skipRead = false;
|
||||
|
||||
/*
|
||||
for( CigarElement cigarElement: read.getCigar().getCigarElements() ) {
|
||||
if( cigarElement.getOperator() != CigarOperator.M ) {
|
||||
System.out.printf("Skipping read %s because it features indels%n", read.getReadName());
|
||||
skipRead = true;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
if(read.getReadString().indexOf("N") >= 0) {
|
||||
System.out.printf("Skipping read %s because it contains Ns%n", read.getReadName());
|
||||
|
|
@ -74,11 +74,12 @@ public class AlignerTestHarness {
|
|||
|
||||
List<Alignment> alignments = aligner.align(read);
|
||||
if(alignments.size() == 0 )
|
||||
throw new StingException(String.format("Unable to align read %s to reference.",read.getReadName()));
|
||||
|
||||
//System.out.printf("%s: Aligned read to reference at position %d with %d mismatches.%n", read.getReadName(), alignments.get(0).getAlignmentStart(), alignments.get(0).getScore());
|
||||
throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
|
||||
|
||||
Alignment alignment = alignments.get(0);
|
||||
|
||||
System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), alignment.getAlignmentStart(), alignment.getMismatches(), alignment.getGapOpens(), alignment.getGapExtensions());
|
||||
|
||||
if( read.getAlignmentStart() != alignment.getAlignmentStart() ) {
|
||||
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
||||
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()-1).getBases());
|
||||
|
|
|
|||
|
|
@ -0,0 +1,13 @@
|
|||
package org.broadinstitute.sting.alignment.bwa;
|
||||
|
||||
/**
|
||||
* The current state of an alignment.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public enum AlignmentState {
|
||||
MATCH_MISMATCH,
|
||||
INSERTION,
|
||||
DELETION
|
||||
}
|
||||
|
|
@ -48,22 +48,7 @@ public class BWAAligner implements Aligner {
|
|||
/**
|
||||
* Maximum number of gap extensions (-e option from original BWA).
|
||||
*/
|
||||
private static final int MAXIMUM_GAP_EXTENSIONS = -1;
|
||||
|
||||
/**
|
||||
* Penalty for straight mismatches (-M option from original BWA).
|
||||
*/
|
||||
private static final int MISMATCH_PENALTY = 3;
|
||||
|
||||
/**
|
||||
* Penalty for gap opens (-O option from original BWA).
|
||||
*/
|
||||
private static final int GAP_OPEN_PENALTY = 11;
|
||||
|
||||
/**
|
||||
* Penalty for gap extensions (-E option from original BWA).
|
||||
*/
|
||||
private static final int GAP_EXTENSION_PENALTY = 4;
|
||||
private static final int MAXIMUM_GAP_EXTENSIONS = 6;
|
||||
|
||||
public BWAAligner( File forwardBWTFile, File reverseBWTFile, File reverseSuffixArrayFile ) {
|
||||
forwardBWT = new BWTReader(forwardBWTFile).read();
|
||||
|
|
@ -91,6 +76,7 @@ public class BWAAligner implements Aligner {
|
|||
initial.position = 0;
|
||||
initial.loBound = 0;
|
||||
initial.hiBound = forwardBWT.length();
|
||||
initial.state = AlignmentState.MATCH_MISMATCH;
|
||||
initial.mismatches = 0;
|
||||
|
||||
BWAAlignment initialReverse = new BWAAlignment();
|
||||
|
|
@ -98,6 +84,7 @@ public class BWAAligner implements Aligner {
|
|||
initialReverse.position = 0;
|
||||
initialReverse.loBound = 0;
|
||||
initialReverse.hiBound = reverseBWT.length();
|
||||
initialReverse.state = AlignmentState.MATCH_MISMATCH;
|
||||
initialReverse.mismatches = 0;
|
||||
|
||||
alignments.add(initial);
|
||||
|
|
@ -112,32 +99,103 @@ public class BWAAligner implements Aligner {
|
|||
|
||||
// Done with this particular alignment.
|
||||
if(alignment.position == read.getReadLength()-1) {
|
||||
alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()) + 1;
|
||||
alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()-alignment.gapOpens-alignment.gapExtensions) + 1;
|
||||
return Collections.<Alignment>singletonList(alignment);
|
||||
}
|
||||
|
||||
//System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d%n", alignments.size(), alignment, lowerBounds.get(alignment.position).value);
|
||||
//System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value);
|
||||
|
||||
// if z < D(i) then return {}
|
||||
int mismatches = MAXIMUM_EDIT_DISTANCE - alignment.mismatches;
|
||||
int mismatches = MAXIMUM_EDIT_DISTANCE - alignment.mismatches - alignment.gapOpens;
|
||||
if( mismatches < lowerBounds.get(alignment.position+1).value )
|
||||
continue;
|
||||
|
||||
if( alignment.mismatches > MAXIMUM_EDIT_DISTANCE )
|
||||
continue;
|
||||
|
||||
// For each base in { A, C, G, T }
|
||||
if( alignment.state == AlignmentState.MATCH_MISMATCH ) {
|
||||
if( alignment.gapOpens < MAXIMUM_GAP_OPENS ) {
|
||||
// Add a potential insertion.
|
||||
BWAAlignment newAlignment = new BWAAlignment();
|
||||
newAlignment.negativeStrand = alignment.negativeStrand;
|
||||
newAlignment.position = alignment.position + 1;
|
||||
newAlignment.state = AlignmentState.INSERTION;
|
||||
newAlignment.gapOpens = alignment.gapOpens + 1;
|
||||
newAlignment.gapExtensions = alignment.gapExtensions;
|
||||
|
||||
newAlignment.loBound = alignment.loBound;
|
||||
newAlignment.hiBound = alignment.hiBound;
|
||||
|
||||
alignments.add(newAlignment);
|
||||
|
||||
// Add a potential deletion by marking a deletion and augmenting the position.
|
||||
for(Base base: EnumSet.allOf(Base.class)) {
|
||||
newAlignment = new BWAAlignment();
|
||||
newAlignment.negativeStrand = alignment.negativeStrand;
|
||||
newAlignment.position = alignment.position;
|
||||
newAlignment.state = AlignmentState.DELETION;
|
||||
newAlignment.gapOpens = alignment.gapOpens + 1;
|
||||
newAlignment.gapExtensions = alignment.gapExtensions;
|
||||
|
||||
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
||||
|
||||
if( newAlignment.loBound <= newAlignment.hiBound )
|
||||
alignments.add(newAlignment);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( alignment.state == AlignmentState.INSERTION ) {
|
||||
if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS ) {
|
||||
// Add a potential insertion extension.
|
||||
BWAAlignment newAlignment = new BWAAlignment();
|
||||
newAlignment.negativeStrand = alignment.negativeStrand;
|
||||
newAlignment.position = alignment.position + 1;
|
||||
newAlignment.state = AlignmentState.INSERTION;
|
||||
newAlignment.gapOpens = alignment.gapOpens;
|
||||
newAlignment.gapExtensions = alignment.gapExtensions+1;
|
||||
|
||||
newAlignment.loBound = alignment.loBound;
|
||||
newAlignment.hiBound = alignment.hiBound;
|
||||
|
||||
if( newAlignment.loBound <= newAlignment.hiBound )
|
||||
alignments.add(newAlignment);
|
||||
}
|
||||
}
|
||||
else if( alignment.state == AlignmentState.DELETION ) {
|
||||
if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS ) {
|
||||
// Add a potential deletion by marking a deletion and augmenting the position.
|
||||
for(Base base: EnumSet.allOf(Base.class)) {
|
||||
BWAAlignment newAlignment = new BWAAlignment();
|
||||
newAlignment.negativeStrand = alignment.negativeStrand;
|
||||
newAlignment.position = alignment.position;
|
||||
newAlignment.state = AlignmentState.DELETION;
|
||||
newAlignment.gapOpens = alignment.gapOpens;
|
||||
newAlignment.gapExtensions = alignment.gapExtensions + 1;
|
||||
|
||||
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
||||
|
||||
alignments.add(newAlignment);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mismatches
|
||||
for(Base base: EnumSet.allOf(Base.class)) {
|
||||
// Create and initialize a new alignment, given that base as the candidate.
|
||||
BWAAlignment newAlignment = new BWAAlignment();
|
||||
newAlignment.negativeStrand = alignment.negativeStrand;
|
||||
newAlignment.position = alignment.position + 1;
|
||||
|
||||
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
||||
newAlignment.state = AlignmentState.MATCH_MISMATCH;
|
||||
newAlignment.mismatches = alignment.mismatches;
|
||||
if( base.toASCII() != bases[newAlignment.position] )
|
||||
newAlignment.mismatches++;
|
||||
newAlignment.gapOpens = alignment.gapOpens;
|
||||
newAlignment.gapExtensions = alignment.gapExtensions;
|
||||
|
||||
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
||||
|
||||
// If this alignment is valid, add it to the list.
|
||||
if( newAlignment.loBound <= newAlignment.hiBound )
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
package org.broadinstitute.sting.alignment.bwa;
|
||||
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import org.broadinstitute.sting.alignment.bwa.bwt.BWT;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* An alignment object to be used incrementally as the BWA aligner
|
||||
|
|
@ -12,8 +10,6 @@ import net.sf.samtools.SAMRecord;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class BWAAlignment implements Alignment {
|
||||
enum State { MATCH, INSERTION, DELETION }
|
||||
|
||||
/**
|
||||
* Start of the final alignment.
|
||||
*/
|
||||
|
|
@ -34,6 +30,16 @@ public class BWAAlignment implements Alignment {
|
|||
*/
|
||||
protected int mismatches;
|
||||
|
||||
/**
|
||||
* Number of gap opens in alignment.
|
||||
*/
|
||||
protected int gapOpens;
|
||||
|
||||
/**
|
||||
* Number of gap extensions in alignment.
|
||||
*/
|
||||
protected int gapExtensions;
|
||||
|
||||
/**
|
||||
* Working variable. The lower bound of the alignment within the BWT.
|
||||
*/
|
||||
|
|
@ -47,7 +53,7 @@ public class BWAAlignment implements Alignment {
|
|||
/**
|
||||
* Indicates the current state of an alignment. Are we in an insertion? Deletion?
|
||||
*/
|
||||
protected State alignmentState;
|
||||
protected AlignmentState state;
|
||||
|
||||
/**
|
||||
* Gets the starting position for the given alignment.
|
||||
|
|
@ -70,9 +76,13 @@ public class BWAAlignment implements Alignment {
|
|||
* @return BWA-style scores. 0 is best.
|
||||
*/
|
||||
public int getScore() {
|
||||
return mismatches;
|
||||
return mismatches + gapOpens + gapExtensions;
|
||||
}
|
||||
|
||||
public int getMismatches() { return mismatches; }
|
||||
public int getGapOpens() { return gapOpens; }
|
||||
public int getGapExtensions() { return gapExtensions; }
|
||||
|
||||
/**
|
||||
* Compare this alignment to another alignment.
|
||||
* @param other Other alignment to which to compare.
|
||||
|
|
@ -88,6 +98,6 @@ public class BWAAlignment implements Alignment {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("position = %d, mismatches = %d, loBound = %d, hiBound = %d, score = %d", position, mismatches, loBound, hiBound, getScore());
|
||||
return String.format("position: %d, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d", position, state, mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue