Fix off-by-number-of-deletions issue with negative strand reads. Improved performance by factor of 2.5x.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1761 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7605ee500c
commit
05aa928e3e
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.alignment.bwa;
|
package org.broadinstitute.sting.alignment.bwa;
|
||||||
|
|
||||||
import org.broadinstitute.sting.alignment.bwa.bwt.*;
|
|
||||||
import org.broadinstitute.sting.alignment.Aligner;
|
import org.broadinstitute.sting.alignment.Aligner;
|
||||||
import org.broadinstitute.sting.alignment.Alignment;
|
import org.broadinstitute.sting.alignment.Alignment;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
@ -44,10 +43,12 @@ public class AlignerTestHarness {
|
||||||
SAMFileReader reader = new SAMFileReader(bamFile);
|
SAMFileReader reader = new SAMFileReader(bamFile);
|
||||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||||
|
|
||||||
|
int mismatches = 0;
|
||||||
|
|
||||||
for(SAMRecord read: reader) {
|
for(SAMRecord read: reader) {
|
||||||
count++;
|
count++;
|
||||||
//if( count > 25000 ) break;
|
if( count > 10000 ) break;
|
||||||
//if( count != 39 ) continue;
|
//if( count != 2 ) continue;
|
||||||
//if( !read.getReadName().endsWith("1507:1636#0") )
|
//if( !read.getReadName().endsWith("1507:1636#0") )
|
||||||
// continue;
|
// continue;
|
||||||
|
|
||||||
|
|
@ -78,8 +79,10 @@ public class AlignerTestHarness {
|
||||||
|
|
||||||
System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), alignment.getAlignmentStart(), alignment.getMismatches(), alignment.getGapOpens(), alignment.getGapExtensions());
|
System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), alignment.getAlignmentStart(), alignment.getMismatches(), alignment.getGapOpens(), alignment.getGapExtensions());
|
||||||
|
|
||||||
if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() )
|
if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() ) {
|
||||||
throw new StingException("Read has been aligned in wrong direction");
|
System.out.println("Read has been aligned in wrong direction");
|
||||||
|
mismatches++;
|
||||||
|
}
|
||||||
|
|
||||||
if( read.getAlignmentStart() != alignment.getAlignmentStart() ) {
|
if( read.getAlignmentStart() != alignment.getAlignmentStart() ) {
|
||||||
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
||||||
|
|
@ -101,7 +104,8 @@ public class AlignerTestHarness {
|
||||||
System.out.printf("read = %s%n", read.getReadString());
|
System.out.printf("read = %s%n", read.getReadString());
|
||||||
System.out.printf("expected ref = %s%n", expectedRef);
|
System.out.printf("expected ref = %s%n", expectedRef);
|
||||||
System.out.printf("actual ref = %s%n", alignedRef);
|
System.out.printf("actual ref = %s%n", alignedRef);
|
||||||
throw new StingException(String.format("Read %s was placed at incorrect location; target alignment = %d; actual alignment = %d%n",read.getReadName(),read.getAlignmentStart(),alignment.getAlignmentStart()));
|
mismatches++;
|
||||||
|
//throw new StingException(String.format("Read %s was placed at incorrect location; target alignment = %d; actual alignment = %d; count = %d%n",read.getReadName(),read.getAlignmentStart(),alignment.getAlignmentStart(),count));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -109,7 +113,7 @@ public class AlignerTestHarness {
|
||||||
System.out.printf("%d reads examined.%n",count);
|
System.out.printf("%d reads examined.%n",count);
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.printf("%d reads examined.%n",count);
|
System.out.printf("%d reads examined; %d mismatches.%n",count,mismatches);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,129 @@
|
||||||
|
package org.broadinstitute.sting.alignment.bwa;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.util.Deque;
|
||||||
|
import java.util.ArrayDeque;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a sequence of matches.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class AlignmentMatchSequence implements Cloneable {
|
||||||
|
/**
|
||||||
|
* Stores the particular match entries in the order they occur.
|
||||||
|
*/
|
||||||
|
private Deque<AlignmentMatchSequenceEntry> entries = new ArrayDeque<AlignmentMatchSequenceEntry>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clone the given match sequence.
|
||||||
|
* @return A deep copy of the current match sequence.
|
||||||
|
*/
|
||||||
|
public AlignmentMatchSequence clone() {
|
||||||
|
AlignmentMatchSequence copy = null;
|
||||||
|
try {
|
||||||
|
copy = (AlignmentMatchSequence)super.clone();
|
||||||
|
}
|
||||||
|
catch( CloneNotSupportedException ex ) {
|
||||||
|
throw new StingException("Unable to clone AlignmentMatchSequence.");
|
||||||
|
}
|
||||||
|
|
||||||
|
copy.entries = new ArrayDeque<AlignmentMatchSequenceEntry>();
|
||||||
|
for( AlignmentMatchSequenceEntry entry: entries )
|
||||||
|
copy.entries.add(entry.clone());
|
||||||
|
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* All a new alignment of the given state.
|
||||||
|
* @param state State to add to the sequence.
|
||||||
|
*/
|
||||||
|
public void addNext( AlignmentState state ) {
|
||||||
|
AlignmentMatchSequenceEntry last = entries.peekLast();
|
||||||
|
// If the last entry is the same as this one, increment it. Otherwise, add a new entry.
|
||||||
|
if( last != null && last.alignmentState == state )
|
||||||
|
last.increment();
|
||||||
|
else
|
||||||
|
entries.add(new AlignmentMatchSequenceEntry(state));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the current state of this alignment (what's the state of the last base?)
|
||||||
|
* @return State of the most recently aligned base.
|
||||||
|
*/
|
||||||
|
public AlignmentState getCurrentState() {
|
||||||
|
if( entries.size() == 0 )
|
||||||
|
return AlignmentState.MATCH_MISMATCH;
|
||||||
|
return entries.peekLast().getAlignmentState();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bases in the read match the given state.
|
||||||
|
* @param state State to test.
|
||||||
|
* @return number of bases which match that state.
|
||||||
|
*/
|
||||||
|
public int getNumberOfBasesMatchingState(AlignmentState state) {
|
||||||
|
int matches = 0;
|
||||||
|
for( AlignmentMatchSequenceEntry entry: entries ) {
|
||||||
|
if( entry.getAlignmentState() == state )
|
||||||
|
matches += entry.count;
|
||||||
|
}
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stores an individual match sequence entry.
|
||||||
|
*/
|
||||||
|
private class AlignmentMatchSequenceEntry implements Cloneable {
|
||||||
|
/**
|
||||||
|
* The state of the alignment throughout a given point in the sequence.
|
||||||
|
*/
|
||||||
|
private final AlignmentState alignmentState;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of bases having this particular state.
|
||||||
|
*/
|
||||||
|
private int count;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new sequence entry with the given state.
|
||||||
|
* @param alignmentState The state that this sequence should contain.
|
||||||
|
*/
|
||||||
|
AlignmentMatchSequenceEntry( AlignmentState alignmentState ) {
|
||||||
|
this.alignmentState = alignmentState;
|
||||||
|
this.count = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clone the given match sequence entry.
|
||||||
|
* @return A deep copy of the current match sequence entry.
|
||||||
|
*/
|
||||||
|
public AlignmentMatchSequenceEntry clone() {
|
||||||
|
try {
|
||||||
|
return (AlignmentMatchSequenceEntry)super.clone();
|
||||||
|
}
|
||||||
|
catch( CloneNotSupportedException ex ) {
|
||||||
|
throw new StingException("Unable to clone AlignmentMatchSequenceEntry.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the current state of the alignment.
|
||||||
|
* @return The state of the current sequence.
|
||||||
|
*/
|
||||||
|
AlignmentState getAlignmentState() {
|
||||||
|
return alignmentState;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the count of alignments having this particular state.
|
||||||
|
*/
|
||||||
|
void increment() {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.alignment.bwa;
|
package org.broadinstitute.sting.alignment.bwa;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The current state of an alignment.
|
* The state of a given base in the alignment.
|
||||||
*
|
*
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
|
|
|
||||||
|
|
@ -67,6 +67,11 @@ public class BWAAligner implements Aligner {
|
||||||
*/
|
*/
|
||||||
public final int GAP_EXTENSION_PENALTY = 4;
|
public final int GAP_EXTENSION_PENALTY = 4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Skip the ends of indels.
|
||||||
|
*/
|
||||||
|
public final int INDEL_END_SKIP = 5;
|
||||||
|
|
||||||
public BWAAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) {
|
public BWAAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) {
|
||||||
forwardBWT = new BWTReader(forwardBWTFile).read();
|
forwardBWT = new BWTReader(forwardBWTFile).read();
|
||||||
reverseBWT = new BWTReader(reverseBWTFile).read();
|
reverseBWT = new BWTReader(reverseBWTFile).read();
|
||||||
|
|
@ -75,6 +80,8 @@ public class BWAAligner implements Aligner {
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Alignment> align( SAMRecord read ) {
|
public List<Alignment> align( SAMRecord read ) {
|
||||||
|
List<Alignment> successfulMatches = new ArrayList<Alignment>();
|
||||||
|
|
||||||
byte[] uncomplementedBases = read.getReadBases();
|
byte[] uncomplementedBases = read.getReadBases();
|
||||||
byte[] complementedBases = BaseUtils.reverse(BaseUtils.simpleReverseComplement(uncomplementedBases));
|
byte[] complementedBases = BaseUtils.reverse(BaseUtils.simpleReverseComplement(uncomplementedBases));
|
||||||
|
|
||||||
|
|
@ -88,77 +95,100 @@ public class BWAAligner implements Aligner {
|
||||||
System.out.printf("ReverseBWT: lb[%d] = %s%n",i,reverseLowerBounds.get(i));
|
System.out.printf("ReverseBWT: lb[%d] = %s%n",i,reverseLowerBounds.get(i));
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// Seed the best score with any score that won't overflow on comparison.
|
||||||
|
int bestScore = Integer.MAX_VALUE - MISMATCH_PENALTY;
|
||||||
|
int bestDiff = MAXIMUM_EDIT_DISTANCE+1;
|
||||||
|
int maxDiff = MAXIMUM_EDIT_DISTANCE;
|
||||||
|
|
||||||
PriorityQueue<BWAAlignment> alignments = new PriorityQueue<BWAAlignment>();
|
PriorityQueue<BWAAlignment> alignments = new PriorityQueue<BWAAlignment>();
|
||||||
|
|
||||||
// Create a fictional initial alignment, with the position just off the end of the read, and the limits
|
// Create a fictional initial alignment, with the position just off the end of the read, and the limits
|
||||||
// set as the entire BWT.
|
// set as the entire BWT.
|
||||||
alignments.add(createSeedAlignment(forwardBWT));
|
|
||||||
alignments.add(createSeedAlignment(reverseBWT));
|
alignments.add(createSeedAlignment(reverseBWT));
|
||||||
|
alignments.add(createSeedAlignment(forwardBWT));
|
||||||
|
|
||||||
while(!alignments.isEmpty()) {
|
while(!alignments.isEmpty()) {
|
||||||
BWAAlignment alignment = alignments.remove();
|
BWAAlignment alignment = alignments.remove();
|
||||||
|
|
||||||
|
// From bwtgap.c in the original BWT; if the rank is worse than the best score + the mismatch PENALTY, move on.
|
||||||
|
if( alignment.getScore() > bestScore + MISMATCH_PENALTY )
|
||||||
|
break;
|
||||||
|
|
||||||
byte[] bases = alignment.negativeStrand ? complementedBases : uncomplementedBases;
|
byte[] bases = alignment.negativeStrand ? complementedBases : uncomplementedBases;
|
||||||
BWT bwt = alignment.negativeStrand ? forwardBWT : reverseBWT;
|
BWT bwt = alignment.negativeStrand ? forwardBWT : reverseBWT;
|
||||||
List<LowerBound> lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds;
|
List<LowerBound> lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds;
|
||||||
|
|
||||||
// Done with this particular alignment.
|
// Found a valid alignment; store it and move on.
|
||||||
if(alignment.position == read.getReadLength()-1) {
|
if(alignment.position == read.getReadLength()-1) {
|
||||||
if( !alignment.isNegativeStrand() )
|
if( !alignment.isNegativeStrand() ) {
|
||||||
alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()-alignment.gapOpens-alignment.gapExtensions) + 1;
|
int sizeAlongReference = alignment.getNumberOfBasesMatchingState(AlignmentState.MATCH_MISMATCH)+alignment.getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
||||||
|
alignment.alignmentStart = reverseBWT.length() - reverseSuffixArray.get(alignment.loBound) - sizeAlongReference + 1;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
alignment.alignmentStart = forwardSuffixArray.get(alignment.loBound) + 1;
|
alignment.alignmentStart = forwardSuffixArray.get(alignment.loBound) + 1;
|
||||||
return Collections.<Alignment>singletonList(alignment);
|
successfulMatches.add(alignment);
|
||||||
|
|
||||||
|
bestScore = Math.min(alignment.getScore(),bestScore);
|
||||||
|
bestDiff = Math.min(alignment.mismatches+alignment.gapOpens+alignment.gapExtensions,bestDiff);
|
||||||
|
maxDiff = bestDiff + 1;
|
||||||
|
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value);
|
//System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d, base = %s%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value, alignment.position >= 0 ? (char)bases[alignment.position] : "");
|
||||||
|
|
||||||
// if z < D(i) then return {}
|
// if z < D(i) then return {}
|
||||||
int mismatches = MAXIMUM_EDIT_DISTANCE - alignment.mismatches - alignment.gapOpens;
|
int mismatches = maxDiff - alignment.mismatches - alignment.gapOpens - alignment.gapExtensions;
|
||||||
if( mismatches < lowerBounds.get(alignment.position+1).value )
|
if( mismatches < lowerBounds.get(alignment.position+1).value )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if( alignment.mismatches > MAXIMUM_EDIT_DISTANCE )
|
if( mismatches > 0 &&
|
||||||
continue;
|
alignment.position+1 >= INDEL_END_SKIP-1+alignment.gapOpens+alignment.gapExtensions &&
|
||||||
|
read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.gapOpens+alignment.gapExtensions ) {
|
||||||
|
if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) {
|
||||||
|
if( alignment.gapOpens < MAXIMUM_GAP_OPENS ) {
|
||||||
|
// Add a potential insertion extension.
|
||||||
|
BWAAlignment insertionAlignment = createInsertionAlignment(alignment);
|
||||||
|
insertionAlignment.gapOpens++;
|
||||||
|
alignments.add(insertionAlignment);
|
||||||
|
|
||||||
if( alignment.state == AlignmentState.MATCH_MISMATCH ) {
|
// Add a potential deletion by marking a deletion and augmenting the position.
|
||||||
if( alignment.gapOpens < MAXIMUM_GAP_OPENS ) {
|
List<BWAAlignment> deletionAlignments = createDeletionAlignments(bwt,alignment);
|
||||||
// Add a potential insertion.
|
for( BWAAlignment deletionAlignment: deletionAlignments )
|
||||||
// Add a potential insertion extension.
|
deletionAlignment.gapOpens++;
|
||||||
BWAAlignment insertionAlignment = createInsertionAlignment(alignment);
|
alignments.addAll(deletionAlignments);
|
||||||
insertionAlignment.gapOpens++;
|
}
|
||||||
alignments.add(insertionAlignment);
|
|
||||||
|
|
||||||
// Add a potential deletion by marking a deletion and augmenting the position.
|
|
||||||
List<BWAAlignment> newAlignments = createDeletionAlignments(bwt,alignment);
|
|
||||||
for( BWAAlignment deletionAlignment: newAlignments )
|
|
||||||
deletionAlignment.gapOpens++;
|
|
||||||
alignments.addAll(newAlignments);
|
|
||||||
}
|
}
|
||||||
}
|
else if( alignment.getCurrentState() == AlignmentState.INSERTION ) {
|
||||||
else if( alignment.state == AlignmentState.INSERTION ) {
|
if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) {
|
||||||
if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS ) {
|
// Add a potential insertion extension.
|
||||||
// Add a potential insertion extension.
|
BWAAlignment insertionAlignment = createInsertionAlignment(alignment);
|
||||||
BWAAlignment newAlignment = createInsertionAlignment(alignment);
|
insertionAlignment.gapExtensions++;
|
||||||
newAlignment.gapExtensions++;
|
alignments.add(insertionAlignment);
|
||||||
alignments.add(newAlignment);
|
}
|
||||||
}
|
}
|
||||||
}
|
else if( alignment.getCurrentState() == AlignmentState.DELETION ) {
|
||||||
else if( alignment.state == AlignmentState.DELETION ) {
|
if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) {
|
||||||
if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS ) {
|
// Add a potential deletion by marking a deletion and augmenting the position.
|
||||||
// Add a potential deletion by marking a deletion and augmenting the position.
|
List<BWAAlignment> deletionAlignments = createDeletionAlignments(bwt,alignment);
|
||||||
List<BWAAlignment> newAlignments = createDeletionAlignments(bwt,alignment);
|
for( BWAAlignment deletionAlignment: deletionAlignments )
|
||||||
for( BWAAlignment newAlignment: newAlignments )
|
deletionAlignment.gapExtensions++;
|
||||||
newAlignment.gapExtensions++;
|
alignments.addAll(deletionAlignments);
|
||||||
alignments.addAll(newAlignments);
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mismatches
|
// Mismatches
|
||||||
alignments.addAll(createMatchedAlignments(bwt,alignment,bases));
|
// Temporary -- look ahead to see if the next alignment is bounded.
|
||||||
|
boolean allowMismatches = mismatches > 0;
|
||||||
|
if( alignment.position+1 < read.getReadLength()-1 )
|
||||||
|
allowMismatches &=
|
||||||
|
!(lowerBounds.get(alignment.position+2).value == mismatches-1 && lowerBounds.get(alignment.position+1).value == mismatches-1 &&
|
||||||
|
lowerBounds.get(alignment.position+2).width == lowerBounds.get(alignment.position+1).width);
|
||||||
|
alignments.addAll(createMatchedAlignments(bwt,alignment,bases,allowMismatches));
|
||||||
}
|
}
|
||||||
|
|
||||||
return Collections.emptyList();
|
return successfulMatches;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -169,10 +199,9 @@ public class BWAAligner implements Aligner {
|
||||||
private BWAAlignment createSeedAlignment(BWT bwt) {
|
private BWAAlignment createSeedAlignment(BWT bwt) {
|
||||||
BWAAlignment seed = new BWAAlignment(this);
|
BWAAlignment seed = new BWAAlignment(this);
|
||||||
seed.negativeStrand = (bwt == forwardBWT);
|
seed.negativeStrand = (bwt == forwardBWT);
|
||||||
seed.position = 0;
|
seed.position = -1;
|
||||||
seed.loBound = 0;
|
seed.loBound = 0;
|
||||||
seed.hiBound = bwt.length();
|
seed.hiBound = bwt.length();
|
||||||
seed.state = AlignmentState.MATCH_MISMATCH;
|
|
||||||
seed.mismatches = 0;
|
seed.mismatches = 0;
|
||||||
return seed;
|
return seed;
|
||||||
}
|
}
|
||||||
|
|
@ -182,11 +211,31 @@ public class BWAAligner implements Aligner {
|
||||||
* @param bwt Source BWT with which to work.
|
* @param bwt Source BWT with which to work.
|
||||||
* @param alignment Alignment for the previous position.
|
* @param alignment Alignment for the previous position.
|
||||||
* @param bases The bases in the read.
|
* @param bases The bases in the read.
|
||||||
|
* @param allowMismatch Should mismatching bases be allowed?
|
||||||
* @return New alignment representing this position if valid; null otherwise.
|
* @return New alignment representing this position if valid; null otherwise.
|
||||||
*/
|
*/
|
||||||
private List<BWAAlignment> createMatchedAlignments( BWT bwt, BWAAlignment alignment, byte[] bases ) {
|
private List<BWAAlignment> createMatchedAlignments( BWT bwt, BWAAlignment alignment, byte[] bases, boolean allowMismatch ) {
|
||||||
List<BWAAlignment> newAlignments = new ArrayList<BWAAlignment>();
|
List<BWAAlignment> newAlignments = new ArrayList<BWAAlignment>();
|
||||||
for(Base base: EnumSet.allOf(Base.class)) {
|
|
||||||
|
List<Base> baseChoices = new ArrayList<Base>();
|
||||||
|
Base thisBase = Base.fromASCII(bases[alignment.position+1]);
|
||||||
|
|
||||||
|
if( allowMismatch )
|
||||||
|
baseChoices.addAll(EnumSet.allOf(Base.class));
|
||||||
|
else
|
||||||
|
baseChoices.add(thisBase);
|
||||||
|
|
||||||
|
if( thisBase != null ) {
|
||||||
|
// Keep rotating the current base to the last position until we've hit the current base.
|
||||||
|
for( ;; ) {
|
||||||
|
baseChoices.add(baseChoices.remove(0));
|
||||||
|
if( thisBase.equals(baseChoices.get(baseChoices.size()-1)) )
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(Base base: baseChoices) {
|
||||||
BWAAlignment newAlignment = alignment.clone();
|
BWAAlignment newAlignment = alignment.clone();
|
||||||
|
|
||||||
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||||
|
|
@ -197,7 +246,7 @@ public class BWAAligner implements Aligner {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
newAlignment.position++;
|
newAlignment.position++;
|
||||||
newAlignment.state = AlignmentState.MATCH_MISMATCH;
|
newAlignment.addState(AlignmentState.MATCH_MISMATCH);
|
||||||
if( base.toASCII() != bases[newAlignment.position] )
|
if( base.toASCII() != bases[newAlignment.position] )
|
||||||
newAlignment.mismatches++;
|
newAlignment.mismatches++;
|
||||||
|
|
||||||
|
|
@ -216,12 +265,12 @@ public class BWAAligner implements Aligner {
|
||||||
// Add a potential insertion extension.
|
// Add a potential insertion extension.
|
||||||
BWAAlignment newAlignment = alignment.clone();
|
BWAAlignment newAlignment = alignment.clone();
|
||||||
newAlignment.position++;
|
newAlignment.position++;
|
||||||
newAlignment.state = AlignmentState.INSERTION;
|
newAlignment.addState(AlignmentState.INSERTION);
|
||||||
return newAlignment;
|
return newAlignment;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create new alignments representing a deletion a this point in the read.
|
* Create new alignments representing a deletion at this point in the read.
|
||||||
* @param bwt source BWT for inferring deletion info.
|
* @param bwt source BWT for inferring deletion info.
|
||||||
* @param alignment Alignment from which to derive the deletion.
|
* @param alignment Alignment from which to derive the deletion.
|
||||||
* @return New alignments reflecting all possible deletions.
|
* @return New alignments reflecting all possible deletions.
|
||||||
|
|
@ -238,7 +287,7 @@ public class BWAAligner implements Aligner {
|
||||||
if( newAlignment.loBound > newAlignment.hiBound )
|
if( newAlignment.loBound > newAlignment.hiBound )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
newAlignment.state = AlignmentState.DELETION;
|
newAlignment.addState(AlignmentState.DELETION);
|
||||||
|
|
||||||
newAlignments.add(newAlignment);
|
newAlignments.add(newAlignment);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,16 @@ import org.broadinstitute.sting.utils.StingException;
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class BWAAlignment implements Alignment, Cloneable {
|
public class BWAAlignment implements Alignment, Cloneable {
|
||||||
|
/**
|
||||||
|
* Track the number of alignments that have been created.
|
||||||
|
*/
|
||||||
|
private static long numCreated;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Which number alignment is this?
|
||||||
|
*/
|
||||||
|
private long creationNumber;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The aligner performing the alignments.
|
* The aligner performing the alignments.
|
||||||
*/
|
*/
|
||||||
|
|
@ -22,10 +32,15 @@ public class BWAAlignment implements Alignment, Cloneable {
|
||||||
protected int alignmentStart;
|
protected int alignmentStart;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Working variable. Is this match being treated as a negative or positive strand?
|
* Is this match being treated as a negative or positive strand?
|
||||||
*/
|
*/
|
||||||
protected boolean negativeStrand;
|
protected boolean negativeStrand;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The sequence of matches/mismatches/insertions/deletions.
|
||||||
|
*/
|
||||||
|
private AlignmentMatchSequence alignmentMatchSequence = new AlignmentMatchSequence();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Working variable. How many bases have been matched at this point.
|
* Working variable. How many bases have been matched at this point.
|
||||||
*/
|
*/
|
||||||
|
|
@ -56,11 +71,6 @@ public class BWAAlignment implements Alignment, Cloneable {
|
||||||
*/
|
*/
|
||||||
protected int hiBound;
|
protected int hiBound;
|
||||||
|
|
||||||
/**
|
|
||||||
* Indicates the current state of an alignment. Are we in an insertion? Deletion?
|
|
||||||
*/
|
|
||||||
protected AlignmentState state;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the starting position for the given alignment.
|
* Gets the starting position for the given alignment.
|
||||||
* @return Starting position.
|
* @return Starting position.
|
||||||
|
|
@ -77,6 +87,22 @@ public class BWAAlignment implements Alignment, Cloneable {
|
||||||
return negativeStrand;
|
return negativeStrand;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the current state of this alignment (state of the last base viewed)..
|
||||||
|
* @return Current state of the alignment.
|
||||||
|
*/
|
||||||
|
public AlignmentState getCurrentState() {
|
||||||
|
return alignmentMatchSequence.getCurrentState();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds the given state to the current alignment.
|
||||||
|
* @param state State to add to the given alignment.
|
||||||
|
*/
|
||||||
|
public void addState( AlignmentState state ) {
|
||||||
|
alignmentMatchSequence.addNext(state);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the BWA score of this alignment.
|
* Gets the BWA score of this alignment.
|
||||||
* @return BWA-style scores. 0 is best.
|
* @return BWA-style scores. 0 is best.
|
||||||
|
|
@ -95,6 +121,7 @@ public class BWAAlignment implements Alignment, Cloneable {
|
||||||
*/
|
*/
|
||||||
public BWAAlignment( BWAAligner aligner ) {
|
public BWAAlignment( BWAAligner aligner ) {
|
||||||
this.aligner = aligner;
|
this.aligner = aligner;
|
||||||
|
this.creationNumber = numCreated++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -102,29 +129,45 @@ public class BWAAlignment implements Alignment, Cloneable {
|
||||||
* @return New instance of the alignment.
|
* @return New instance of the alignment.
|
||||||
*/
|
*/
|
||||||
public BWAAlignment clone() {
|
public BWAAlignment clone() {
|
||||||
|
BWAAlignment newAlignment = null;
|
||||||
try {
|
try {
|
||||||
return (BWAAlignment)super.clone();
|
newAlignment = (BWAAlignment)super.clone();
|
||||||
}
|
}
|
||||||
catch( CloneNotSupportedException ex ) {
|
catch( CloneNotSupportedException ex ) {
|
||||||
throw new StingException("Unable to clone BWAAlignment.");
|
throw new StingException("Unable to clone BWAAlignment.");
|
||||||
}
|
}
|
||||||
|
newAlignment.creationNumber = numCreated++;
|
||||||
|
newAlignment.alignmentMatchSequence = alignmentMatchSequence.clone();
|
||||||
|
|
||||||
|
return newAlignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bases in the read match the given state.
|
||||||
|
* @param state State to test.
|
||||||
|
* @return number of bases which match that state.
|
||||||
|
*/
|
||||||
|
public int getNumberOfBasesMatchingState(AlignmentState state) {
|
||||||
|
return alignmentMatchSequence.getNumberOfBasesMatchingState(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compare this alignment to another alignment.
|
* Compare this alignment to another alignment.
|
||||||
* @param other Other alignment to which to compare.
|
* @param rhs Other alignment to which to compare.
|
||||||
* @return < 0 if this < other, == 0 if this == other, > 0 if this > other
|
* @return < 0 if this < other, == 0 if this == other, > 0 if this > other
|
||||||
*/
|
*/
|
||||||
public int compareTo(Alignment other) {
|
public int compareTo(Alignment rhs) {
|
||||||
// If the scores are equal, use the position to disambiguate order.
|
BWAAlignment other = (BWAAlignment)rhs;
|
||||||
|
|
||||||
|
// If the scores are equal, use the score to disambiguate.
|
||||||
int scoreComparison = Integer.valueOf(getScore()).compareTo(other.getScore());
|
int scoreComparison = Integer.valueOf(getScore()).compareTo(other.getScore());
|
||||||
if( scoreComparison != 0 )
|
if( scoreComparison != 0 )
|
||||||
return scoreComparison;
|
return scoreComparison;
|
||||||
else
|
|
||||||
return -Integer.valueOf(position).compareTo(((BWAAlignment)other).position);
|
return -Long.valueOf(this.creationNumber).compareTo(other.creationNumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("position: %d, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d", position, state, mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore());
|
return String.format("position: %d, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d, creationNumber: %d", position, alignmentMatchSequence.getCurrentState(), mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore(), creationNumber);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue