diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java index 43aad684e..ab93e21e6 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.alignment.bwa.bwt.*; import org.broadinstitute.sting.alignment.Aligner; import org.broadinstitute.sting.alignment.Alignment; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import java.io.File; @@ -21,24 +22,23 @@ import net.sf.samtools.SAMFileReader; */ public class AlignerTestHarness { public static void main( String argv[] ) throws FileNotFoundException { - if( argv.length != 5 ) { - System.out.println("PerfectAlignerTestHarness "); + if( argv.length != 6 ) { + System.out.println("PerfectAlignerTestHarness "); System.exit(1); } File referenceFile = new File(argv[0]); File bwtFile = new File(argv[1]); File rbwtFile = new File(argv[2]); - File reverseSuffixArrayFile = new File(argv[3]); - File bamFile = new File(argv[4]); + File suffixArrayFile = new File(argv[3]); + File reverseSuffixArrayFile = new File(argv[4]); + File bamFile = new File(argv[5]); - align(referenceFile,bwtFile,rbwtFile,reverseSuffixArrayFile,bamFile); + align(referenceFile,bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile,bamFile); } - private static void align(File referenceFile, File bwtFile, File rbwtFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException { - BWT bwt = new BWTReader(bwtFile).read(); - - Aligner aligner = new BWAAligner(bwtFile,rbwtFile,reverseSuffixArrayFile); + private static void align(File referenceFile, File bwtFile, File rbwtFile, File suffixArrayFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException { + Aligner aligner = new BWAAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile); int count = 0; SAMFileReader reader = new SAMFileReader(bamFile); @@ -46,12 +46,31 @@ public class AlignerTestHarness { for(SAMRecord read: reader) { count++; - //if( count > 39 ) break; + //if( count > 25000 ) break; //if( count != 39 ) continue; //if( !read.getReadName().endsWith("1507:1636#0") ) // continue; - List alignments = aligner.align(read); + SAMRecord alignmentCleaned = null; + try { + alignmentCleaned = (SAMRecord)read.clone(); + } + catch( CloneNotSupportedException ex ) { + throw new StingException("SAMRecord clone not supported", ex); + } + + if( alignmentCleaned.getReadNegativeStrandFlag() ) + alignmentCleaned.setReadBases(BaseUtils.simpleReverseComplement(alignmentCleaned.getReadBases())); + + alignmentCleaned.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); + alignmentCleaned.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); + alignmentCleaned.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY); + alignmentCleaned.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); + + // Clear everything except flags pertaining to pairing and set 'unmapped' status to true. + alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C); + + List alignments = aligner.align(alignmentCleaned); if(alignments.size() == 0 ) throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count)); @@ -59,6 +78,9 @@ public class AlignerTestHarness { System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), alignment.getAlignmentStart(), alignment.getMismatches(), alignment.getGapOpens(), alignment.getGapExtensions()); + if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() ) + throw new StingException("Read has been aligned in wrong direction"); + if( read.getAlignmentStart() != alignment.getAlignmentStart() ) { IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile); String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()-1).getBases()); @@ -71,7 +93,7 @@ public class AlignerTestHarness { String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignments.get(0).getAlignmentStart(),alignments.get(0).getAlignmentStart()+read.getReadLength()-1).getBases()); int actualMismatches = 0; for( int i = 0; i < read.getReadLength(); i++ ) { - if( read.getReadBases()[i] != expectedRef.charAt(i) ) + if( read.getReadBases()[i] != alignedRef.charAt(i) ) actualMismatches++; } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java index 459362327..f05806059 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -30,6 +30,11 @@ public class BWAAligner implements Aligner { /** * Suffix array in the forward direction. */ + private SuffixArray forwardSuffixArray; + + /** + * Suffix array in the reverse direction. + */ private SuffixArray reverseSuffixArray; /** @@ -62,41 +67,47 @@ public class BWAAligner implements Aligner { */ public final int GAP_EXTENSION_PENALTY = 4; - public BWAAligner( File forwardBWTFile, File reverseBWTFile, File reverseSuffixArrayFile ) { + public BWAAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) { forwardBWT = new BWTReader(forwardBWTFile).read(); reverseBWT = new BWTReader(reverseBWTFile).read(); + forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile).read(); reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile).read(); } public List align( SAMRecord read ) { - byte[] forwardBases = read.getReadBases(); - byte[] reverseBases = BaseUtils.simpleReverseComplement(forwardBases); + byte[] uncomplementedBases = read.getReadBases(); + byte[] complementedBases = BaseUtils.reverse(BaseUtils.simpleReverseComplement(uncomplementedBases)); - List forwardLowerBounds = LowerBound.create(forwardBases,forwardBWT); - List reverseLowerBounds = LowerBound.create(reverseBases,reverseBWT); + List forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT); + List reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT); - //for( int i = 0; i < forwardLowerBounds.size(); i++ ) - // System.out.printf("ForwardBWT: lb[%d] = %s%n",i,forwardLowerBounds.get(i)); - //for( int i = 0; i < reverseLowerBounds.size(); i++ ) - // System.out.printf("ReverseBWT: lb[%d] = %s%n",i,reverseLowerBounds.get(i)); + /* + for( int i = 0; i < forwardLowerBounds.size(); i++ ) + System.out.printf("ForwardBWT: lb[%d] = %s%n",i,forwardLowerBounds.get(i)); + for( int i = 0; i < reverseLowerBounds.size(); i++ ) + System.out.printf("ReverseBWT: lb[%d] = %s%n",i,reverseLowerBounds.get(i)); + */ PriorityQueue alignments = new PriorityQueue(); // Create a fictional initial alignment, with the position just off the end of the read, and the limits // set as the entire BWT. alignments.add(createSeedAlignment(forwardBWT)); - //alignments.add(createSeedAlignment(reverseBWT)); + alignments.add(createSeedAlignment(reverseBWT)); while(!alignments.isEmpty()) { BWAAlignment alignment = alignments.remove(); - byte[] bases = alignment.negativeStrand ? forwardBases : reverseBases; - BWT bwt = alignment.negativeStrand ? reverseBWT : forwardBWT; - List lowerBounds = alignment.negativeStrand ? forwardLowerBounds : reverseLowerBounds; + byte[] bases = alignment.negativeStrand ? complementedBases : uncomplementedBases; + BWT bwt = alignment.negativeStrand ? forwardBWT : reverseBWT; + List lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds; // Done with this particular alignment. if(alignment.position == read.getReadLength()-1) { - alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()-alignment.gapOpens-alignment.gapExtensions) + 1; + if( !alignment.isNegativeStrand() ) + alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()-alignment.gapOpens-alignment.gapExtensions) + 1; + else + alignment.alignmentStart = forwardSuffixArray.get(alignment.loBound) + 1; return Collections.singletonList(alignment); } @@ -211,6 +222,7 @@ public class BWAAligner implements Aligner { /** * Create new alignments representing a deletion a this point in the read. + * @param bwt source BWT for inferring deletion info. * @param alignment Alignment from which to derive the deletion. * @return New alignments reflecting all possible deletions. */