From 56bc4fa21aeeb1f367e499b0a796191769c93c03 Mon Sep 17 00:00:00 2001 From: hanna Date: Sun, 4 Oct 2009 18:20:20 +0000 Subject: [PATCH] Fixed bug where not all alignments were returned if read aligned to multiple locations. Enhanced test suite to validate all alignments. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1762 348d0f76-0448-11de-a6fe-93d51630548a --- .../alignment/bwa/AlignerTestHarness.java | 67 +++++++++---------- .../sting/alignment/bwa/BWAAligner.java | 24 ++++--- .../sting/alignment/bwa/BWAAlignment.java | 2 +- 3 files changed, 48 insertions(+), 45 deletions(-) diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java index 1da78cb2d..69638d2de 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java @@ -43,13 +43,14 @@ public class AlignerTestHarness { SAMFileReader reader = new SAMFileReader(bamFile); reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); - int mismatches = 0; + int mismatches = 0; + int failures = 0; for(SAMRecord read: reader) { count++; - if( count > 10000 ) break; + //if( count > 100000 ) break; //if( count != 2 ) continue; - //if( !read.getReadName().endsWith("1507:1636#0") ) + //if( !read.getReadName().endsWith("SL-XBC:1:90:15:1280#0") ) // continue; SAMRecord alignmentCleaned = null; @@ -72,48 +73,46 @@ public class AlignerTestHarness { alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C); List alignments = aligner.align(alignmentCleaned); - if(alignments.size() == 0 ) - throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count)); - - Alignment alignment = alignments.get(0); - - System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), alignment.getAlignmentStart(), alignment.getMismatches(), alignment.getGapOpens(), alignment.getGapExtensions()); - - if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() ) { - System.out.println("Read has been aligned in wrong direction"); - mismatches++; + if(alignments.size() == 0 ) { + //throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count)); + System.out.printf("Unable to align read %s to reference; count = %d%n",read.getReadName(),count); + failures++; } - if( read.getAlignmentStart() != alignment.getAlignmentStart() ) { + Alignment foundAlignment = null; + for( Alignment alignment: alignments ) { + if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() ) + continue; + if( read.getAlignmentStart() != alignment.getAlignmentStart() ) + continue; + + foundAlignment = alignment; + } + + if( foundAlignment != null ) { + //System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions()); + } + else { + mismatches++; + //throw new StingException(String.format("Read %s was placed at incorrect location; target alignment = %d; actual alignment = %d; count = %d%n",read.getReadName(),read.getAlignmentStart(),alignment.getAlignmentStart(),count)); + IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile); String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()-1).getBases()); - int expectedMismatches = 0; - for( int i = 0; i < read.getReadLength(); i++ ) { - if( read.getReadBases()[i] != expectedRef.charAt(i) ) - expectedMismatches++; - } - - String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignments.get(0).getAlignmentStart(),alignments.get(0).getAlignmentStart()+read.getReadLength()-1).getBases()); - int actualMismatches = 0; - for( int i = 0; i < read.getReadLength(); i++ ) { - if( read.getReadBases()[i] != alignedRef.charAt(i) ) - actualMismatches++; - } - - if( expectedMismatches != actualMismatches ) { - System.out.printf("read = %s%n", read.getReadString()); - System.out.printf("expected ref = %s%n", expectedRef); - System.out.printf("actual ref = %s%n", alignedRef); - mismatches++; - //throw new StingException(String.format("Read %s was placed at incorrect location; target alignment = %d; actual alignment = %d; count = %d%n",read.getReadName(),read.getAlignmentStart(),alignment.getAlignmentStart(),count)); + System.out.printf("read = %s, strand = %b%n", read.getReadString(), read.getReadNegativeStrandFlag()); + System.out.printf("expected ref = %s%n", expectedRef); + for( Alignment alignment: alignments ) { + String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignments.get(0).getAlignmentStart(),alignments.get(0).getAlignmentStart()+read.getReadLength()-1).getBases()); + System.out.printf("actual ref = %s, strand = %b%n", alignedRef, read.getReadNegativeStrandFlag()); + //System.out.printf("(reversed) = %s, strand = %b%n", BaseUtils.simpleReverseComplement(alignedRef), !read.getReadNegativeStrandFlag()); } } + if( count % 1000 == 0 ) System.out.printf("%d reads examined.%n",count); } - System.out.printf("%d reads examined; %d mismatches.%n",count,mismatches); + System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures); } } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java index 72d87e97d..561af983a 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -120,17 +120,21 @@ public class BWAAligner implements Aligner { // Found a valid alignment; store it and move on. if(alignment.position == read.getReadLength()-1) { - if( !alignment.isNegativeStrand() ) { - int sizeAlongReference = alignment.getNumberOfBasesMatchingState(AlignmentState.MATCH_MISMATCH)+alignment.getNumberOfBasesMatchingState(AlignmentState.DELETION); - alignment.alignmentStart = reverseBWT.length() - reverseSuffixArray.get(alignment.loBound) - sizeAlongReference + 1; - } - else - alignment.alignmentStart = forwardSuffixArray.get(alignment.loBound) + 1; - successfulMatches.add(alignment); + for( int bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++ ) { + BWAAlignment finalAlignment = alignment.clone(); - bestScore = Math.min(alignment.getScore(),bestScore); - bestDiff = Math.min(alignment.mismatches+alignment.gapOpens+alignment.gapExtensions,bestDiff); - maxDiff = bestDiff + 1; + if( finalAlignment.isNegativeStrand() ) + finalAlignment.alignmentStart = forwardSuffixArray.get(bwtIndex) + 1; + else { + int sizeAlongReference = finalAlignment.getNumberOfBasesMatchingState(AlignmentState.MATCH_MISMATCH)+finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION); + finalAlignment.alignmentStart = reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1; + } + successfulMatches.add(finalAlignment); + + bestScore = Math.min(finalAlignment.getScore(),bestScore); + bestDiff = Math.min(finalAlignment.mismatches+finalAlignment.gapOpens+finalAlignment.gapExtensions,bestDiff); + maxDiff = bestDiff + 1; + } continue; } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java index afd6b85b6..46b97cbc5 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java @@ -168,6 +168,6 @@ public class BWAAlignment implements Alignment, Cloneable { } public String toString() { - return String.format("position: %d, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d, creationNumber: %d", position, alignmentMatchSequence.getCurrentState(), mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore(), creationNumber); + return String.format("position: %d, strand: %b, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d, creationNumber: %d", position, negativeStrand, alignmentMatchSequence.getCurrentState(), mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore(), creationNumber); } }