2009-09-23 03:05:10 +08:00
|
|
|
package org.broadinstitute.sting.alignment.bwa;
|
|
|
|
|
|
2009-09-24 07:44:59 +08:00
|
|
|
import org.broadinstitute.sting.alignment.bwa.bwt.*;
|
2009-09-23 03:05:10 +08:00
|
|
|
import org.broadinstitute.sting.alignment.Alignment;
|
|
|
|
|
import org.broadinstitute.sting.alignment.Aligner;
|
|
|
|
|
import org.broadinstitute.sting.utils.BaseUtils;
|
|
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.PriorityQueue;
|
|
|
|
|
import java.util.Collections;
|
|
|
|
|
import java.util.EnumSet;
|
|
|
|
|
|
|
|
|
|
import net.sf.samtools.SAMRecord;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create imperfect alignments from the read to the genome represented by the given BWT / suffix array.
|
|
|
|
|
*
|
|
|
|
|
* @author mhanna
|
|
|
|
|
* @version 0.1
|
|
|
|
|
*/
|
|
|
|
|
public class BWAAligner implements Aligner {
|
|
|
|
|
/**
|
|
|
|
|
* BWT in the forward direction.
|
|
|
|
|
*/
|
|
|
|
|
private BWT forwardBWT;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* BWT in the reverse direction.
|
|
|
|
|
*/
|
|
|
|
|
private BWT reverseBWT;
|
|
|
|
|
|
2009-09-24 07:44:59 +08:00
|
|
|
/**
|
|
|
|
|
* Suffix array in the forward direction.
|
|
|
|
|
*/
|
|
|
|
|
private SuffixArray reverseSuffixArray;
|
|
|
|
|
|
2009-09-23 03:05:10 +08:00
|
|
|
/**
|
|
|
|
|
* Maximum edit distance (-n option from original BWA).
|
|
|
|
|
*/
|
|
|
|
|
private static final int MAXIMUM_EDIT_DISTANCE = 4;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Maximum number of gap opens (-o option from original BWA).
|
|
|
|
|
*/
|
|
|
|
|
private static final int MAXIMUM_GAP_OPENS = 1;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Maximum number of gap extensions (-e option from original BWA).
|
|
|
|
|
*/
|
|
|
|
|
private static final int MAXIMUM_GAP_EXTENSIONS = -1;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Penalty for straight mismatches (-M option from original BWA).
|
|
|
|
|
*/
|
|
|
|
|
private static final int MISMATCH_PENALTY = 3;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Penalty for gap opens (-O option from original BWA).
|
|
|
|
|
*/
|
|
|
|
|
private static final int GAP_OPEN_PENALTY = 11;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Penalty for gap extensions (-E option from original BWA).
|
|
|
|
|
*/
|
|
|
|
|
private static final int GAP_EXTENSION_PENALTY = 4;
|
|
|
|
|
|
2009-09-24 07:44:59 +08:00
|
|
|
public BWAAligner( File forwardBWTFile, File reverseBWTFile, File reverseSuffixArrayFile ) {
|
2009-09-23 03:05:10 +08:00
|
|
|
forwardBWT = new BWTReader(forwardBWTFile).read();
|
|
|
|
|
reverseBWT = new BWTReader(reverseBWTFile).read();
|
2009-09-24 07:44:59 +08:00
|
|
|
reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile).read();
|
2009-09-23 03:05:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public List<Alignment> align( SAMRecord read ) {
|
2009-09-24 07:44:59 +08:00
|
|
|
byte[] forwardBases = read.getReadBases();
|
|
|
|
|
byte[] reverseBases = BaseUtils.simpleReverseComplement(forwardBases);
|
2009-09-23 03:05:10 +08:00
|
|
|
|
2009-09-24 07:44:59 +08:00
|
|
|
List<LowerBound> forwardLowerBounds = LowerBound.create(forwardBases,forwardBWT);
|
|
|
|
|
//for( int i = 0; i < forwardLowerBounds.size(); i++ )
|
|
|
|
|
// System.out.printf("ForwardBWT: lb[%d] = %s%n",i,forwardLowerBounds.get(i));
|
|
|
|
|
List<LowerBound> reverseLowerBounds = LowerBound.create(reverseBases,reverseBWT);
|
|
|
|
|
//for( int i = 0; i < reverseLowerBounds.size(); i++ )
|
|
|
|
|
// System.out.printf("ReverseBWT: lb[%d] = %s%n",i,reverseLowerBounds.get(i));
|
2009-09-23 03:05:10 +08:00
|
|
|
|
|
|
|
|
PriorityQueue<BWAAlignment> alignments = new PriorityQueue<BWAAlignment>();
|
|
|
|
|
|
|
|
|
|
// Create a fictional initial alignment, with the position just off the end of the read, and the limits
|
|
|
|
|
// set as the entire BWT.
|
|
|
|
|
BWAAlignment initial = new BWAAlignment();
|
2009-09-24 07:44:59 +08:00
|
|
|
initial.negativeStrand = true;
|
|
|
|
|
initial.position = 0;
|
2009-09-23 03:05:10 +08:00
|
|
|
initial.loBound = 0;
|
|
|
|
|
initial.hiBound = forwardBWT.length();
|
|
|
|
|
initial.mismatches = 0;
|
|
|
|
|
|
2009-09-24 07:44:59 +08:00
|
|
|
BWAAlignment initialReverse = new BWAAlignment();
|
|
|
|
|
initialReverse.negativeStrand = false;
|
|
|
|
|
initialReverse.position = 0;
|
|
|
|
|
initialReverse.loBound = 0;
|
|
|
|
|
initialReverse.hiBound = reverseBWT.length();
|
|
|
|
|
initialReverse.mismatches = 0;
|
|
|
|
|
|
2009-09-23 03:05:10 +08:00
|
|
|
alignments.add(initial);
|
2009-09-24 07:44:59 +08:00
|
|
|
//alignments.add(initialReverse);
|
2009-09-23 03:05:10 +08:00
|
|
|
|
|
|
|
|
while(!alignments.isEmpty()) {
|
|
|
|
|
BWAAlignment alignment = alignments.remove();
|
|
|
|
|
|
2009-09-24 07:44:59 +08:00
|
|
|
byte[] bases = alignment.negativeStrand ? forwardBases : reverseBases;
|
|
|
|
|
BWT bwt = alignment.negativeStrand ? reverseBWT : forwardBWT;
|
|
|
|
|
List<LowerBound> lowerBounds = alignment.negativeStrand ? forwardLowerBounds : reverseLowerBounds;
|
|
|
|
|
|
2009-09-23 03:05:10 +08:00
|
|
|
// Done with this particular alignment.
|
2009-09-24 07:44:59 +08:00
|
|
|
if(alignment.position == read.getReadLength()-1) {
|
|
|
|
|
alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()) + 1;
|
2009-09-23 03:05:10 +08:00
|
|
|
return Collections.<Alignment>singletonList(alignment);
|
2009-09-24 07:44:59 +08:00
|
|
|
}
|
2009-09-23 03:05:10 +08:00
|
|
|
|
2009-09-24 07:44:59 +08:00
|
|
|
//System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d%n", alignments.size(), alignment, lowerBounds.get(alignment.position).value);
|
2009-09-23 03:05:10 +08:00
|
|
|
|
|
|
|
|
// if z < D(i) then return {}
|
2009-09-24 07:44:59 +08:00
|
|
|
int mismatches = MAXIMUM_EDIT_DISTANCE - alignment.mismatches;
|
|
|
|
|
if( mismatches < lowerBounds.get(alignment.position+1).value )
|
2009-09-23 03:05:10 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if( alignment.mismatches > MAXIMUM_EDIT_DISTANCE )
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// For each base in { A, C, G, T }
|
|
|
|
|
for(Base base: EnumSet.allOf(Base.class)) {
|
|
|
|
|
// Create and initialize a new alignment, given that base as the candidate.
|
|
|
|
|
BWAAlignment newAlignment = new BWAAlignment();
|
2009-09-24 07:44:59 +08:00
|
|
|
newAlignment.negativeStrand = alignment.negativeStrand;
|
|
|
|
|
newAlignment.position = alignment.position + 1;
|
|
|
|
|
|
|
|
|
|
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
|
|
|
|
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
2009-09-23 03:05:10 +08:00
|
|
|
newAlignment.mismatches = alignment.mismatches;
|
2009-09-24 07:44:59 +08:00
|
|
|
if( base.toASCII() != bases[newAlignment.position] )
|
2009-09-23 03:05:10 +08:00
|
|
|
newAlignment.mismatches++;
|
|
|
|
|
|
|
|
|
|
// If this alignment is valid, add it to the list.
|
|
|
|
|
if( newAlignment.loBound <= newAlignment.hiBound )
|
|
|
|
|
alignments.add(newAlignment);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Collections.emptyList();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|