Fixed bug where not all alignments were returned if read aligned to multiple locations. Enhanced test suite to validate all alignments.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1762 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
05aa928e3e
commit
56bc4fa21a
|
|
@ -43,13 +43,14 @@ public class AlignerTestHarness {
|
||||||
SAMFileReader reader = new SAMFileReader(bamFile);
|
SAMFileReader reader = new SAMFileReader(bamFile);
|
||||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||||
|
|
||||||
int mismatches = 0;
|
int mismatches = 0;
|
||||||
|
int failures = 0;
|
||||||
|
|
||||||
for(SAMRecord read: reader) {
|
for(SAMRecord read: reader) {
|
||||||
count++;
|
count++;
|
||||||
if( count > 10000 ) break;
|
//if( count > 100000 ) break;
|
||||||
//if( count != 2 ) continue;
|
//if( count != 2 ) continue;
|
||||||
//if( !read.getReadName().endsWith("1507:1636#0") )
|
//if( !read.getReadName().endsWith("SL-XBC:1:90:15:1280#0") )
|
||||||
// continue;
|
// continue;
|
||||||
|
|
||||||
SAMRecord alignmentCleaned = null;
|
SAMRecord alignmentCleaned = null;
|
||||||
|
|
@ -72,48 +73,46 @@ public class AlignerTestHarness {
|
||||||
alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C);
|
alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C);
|
||||||
|
|
||||||
List<Alignment> alignments = aligner.align(alignmentCleaned);
|
List<Alignment> alignments = aligner.align(alignmentCleaned);
|
||||||
if(alignments.size() == 0 )
|
if(alignments.size() == 0 ) {
|
||||||
throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
|
//throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
|
||||||
|
System.out.printf("Unable to align read %s to reference; count = %d%n",read.getReadName(),count);
|
||||||
Alignment alignment = alignments.get(0);
|
failures++;
|
||||||
|
|
||||||
System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), alignment.getAlignmentStart(), alignment.getMismatches(), alignment.getGapOpens(), alignment.getGapExtensions());
|
|
||||||
|
|
||||||
if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() ) {
|
|
||||||
System.out.println("Read has been aligned in wrong direction");
|
|
||||||
mismatches++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( read.getAlignmentStart() != alignment.getAlignmentStart() ) {
|
Alignment foundAlignment = null;
|
||||||
|
for( Alignment alignment: alignments ) {
|
||||||
|
if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() )
|
||||||
|
continue;
|
||||||
|
if( read.getAlignmentStart() != alignment.getAlignmentStart() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
foundAlignment = alignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( foundAlignment != null ) {
|
||||||
|
//System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
mismatches++;
|
||||||
|
//throw new StingException(String.format("Read %s was placed at incorrect location; target alignment = %d; actual alignment = %d; count = %d%n",read.getReadName(),read.getAlignmentStart(),alignment.getAlignmentStart(),count));
|
||||||
|
|
||||||
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
||||||
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()-1).getBases());
|
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()-1).getBases());
|
||||||
int expectedMismatches = 0;
|
System.out.printf("read = %s, strand = %b%n", read.getReadString(), read.getReadNegativeStrandFlag());
|
||||||
for( int i = 0; i < read.getReadLength(); i++ ) {
|
System.out.printf("expected ref = %s%n", expectedRef);
|
||||||
if( read.getReadBases()[i] != expectedRef.charAt(i) )
|
for( Alignment alignment: alignments ) {
|
||||||
expectedMismatches++;
|
String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignments.get(0).getAlignmentStart(),alignments.get(0).getAlignmentStart()+read.getReadLength()-1).getBases());
|
||||||
}
|
System.out.printf("actual ref = %s, strand = %b%n", alignedRef, read.getReadNegativeStrandFlag());
|
||||||
|
//System.out.printf("(reversed) = %s, strand = %b%n", BaseUtils.simpleReverseComplement(alignedRef), !read.getReadNegativeStrandFlag());
|
||||||
String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignments.get(0).getAlignmentStart(),alignments.get(0).getAlignmentStart()+read.getReadLength()-1).getBases());
|
|
||||||
int actualMismatches = 0;
|
|
||||||
for( int i = 0; i < read.getReadLength(); i++ ) {
|
|
||||||
if( read.getReadBases()[i] != alignedRef.charAt(i) )
|
|
||||||
actualMismatches++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( expectedMismatches != actualMismatches ) {
|
|
||||||
System.out.printf("read = %s%n", read.getReadString());
|
|
||||||
System.out.printf("expected ref = %s%n", expectedRef);
|
|
||||||
System.out.printf("actual ref = %s%n", alignedRef);
|
|
||||||
mismatches++;
|
|
||||||
//throw new StingException(String.format("Read %s was placed at incorrect location; target alignment = %d; actual alignment = %d; count = %d%n",read.getReadName(),read.getAlignmentStart(),alignment.getAlignmentStart(),count));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if( count % 1000 == 0 )
|
if( count % 1000 == 0 )
|
||||||
System.out.printf("%d reads examined.%n",count);
|
System.out.printf("%d reads examined.%n",count);
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.printf("%d reads examined; %d mismatches.%n",count,mismatches);
|
System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -120,17 +120,21 @@ public class BWAAligner implements Aligner {
|
||||||
|
|
||||||
// Found a valid alignment; store it and move on.
|
// Found a valid alignment; store it and move on.
|
||||||
if(alignment.position == read.getReadLength()-1) {
|
if(alignment.position == read.getReadLength()-1) {
|
||||||
if( !alignment.isNegativeStrand() ) {
|
for( int bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++ ) {
|
||||||
int sizeAlongReference = alignment.getNumberOfBasesMatchingState(AlignmentState.MATCH_MISMATCH)+alignment.getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
BWAAlignment finalAlignment = alignment.clone();
|
||||||
alignment.alignmentStart = reverseBWT.length() - reverseSuffixArray.get(alignment.loBound) - sizeAlongReference + 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
alignment.alignmentStart = forwardSuffixArray.get(alignment.loBound) + 1;
|
|
||||||
successfulMatches.add(alignment);
|
|
||||||
|
|
||||||
bestScore = Math.min(alignment.getScore(),bestScore);
|
if( finalAlignment.isNegativeStrand() )
|
||||||
bestDiff = Math.min(alignment.mismatches+alignment.gapOpens+alignment.gapExtensions,bestDiff);
|
finalAlignment.alignmentStart = forwardSuffixArray.get(bwtIndex) + 1;
|
||||||
maxDiff = bestDiff + 1;
|
else {
|
||||||
|
int sizeAlongReference = finalAlignment.getNumberOfBasesMatchingState(AlignmentState.MATCH_MISMATCH)+finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
||||||
|
finalAlignment.alignmentStart = reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1;
|
||||||
|
}
|
||||||
|
successfulMatches.add(finalAlignment);
|
||||||
|
|
||||||
|
bestScore = Math.min(finalAlignment.getScore(),bestScore);
|
||||||
|
bestDiff = Math.min(finalAlignment.mismatches+finalAlignment.gapOpens+finalAlignment.gapExtensions,bestDiff);
|
||||||
|
maxDiff = bestDiff + 1;
|
||||||
|
}
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -168,6 +168,6 @@ public class BWAAlignment implements Alignment, Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("position: %d, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d, creationNumber: %d", position, alignmentMatchSequence.getCurrentState(), mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore(), creationNumber);
|
return String.format("position: %d, strand: %b, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d, creationNumber: %d", position, negativeStrand, alignmentMatchSequence.getCurrentState(), mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore(), creationNumber);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue