diff --git a/c/bwa/bwa_as_lib.sh b/c/bwa/bwa_as_lib.sh new file mode 100644 index 000000000..48e22c553 --- /dev/null +++ b/c/bwa/bwa_as_lib.sh @@ -0,0 +1,18 @@ +CFLAGS="-g -Wall -O2 -m64" +BWA_INCLUDE="/Users/mhanna/src/bwa-0.5.3" +BWA_SRC="/Users/mhanna/src/bwa-0.5.3" + +g++ $CFLAGS -c -I$BWA_INCLUDE -I/System/Library/Frameworks/JavaVM.framework/Headers org_broadinstitute_sting_alignment_bwa_BWACAligner.cpp +g++ $CFLAGS -c -I$BWA_INCLUDE bwa_gateway.cpp -o bwa_gateway.o +gcc $CFLAGS -c $BWA_SRC/bntseq.c -o bntseq.o +gcc $CFLAGS -c $BWA_SRC/bwase.c -o bwase.o +gcc $CFLAGS -c $BWA_SRC/bwt.c -o bwt.o +gcc $CFLAGS -c $BWA_SRC/bwtaln.c -o bwtaln.o +gcc $CFLAGS -c $BWA_SRC/bwtgap.c -o bwtgap.o +gcc $CFLAGS -c $BWA_SRC/bwtio.c -o bwtio.o +gcc $CFLAGS -c $BWA_SRC/bwaseqio.c -o bwaseqio.o +gcc $CFLAGS -c $BWA_SRC/cs2nt.c -o cs2nt.o +gcc $CFLAGS -c $BWA_SRC/kstring.c -o kstring.o +gcc $CFLAGS -c $BWA_SRC/stdaln.c -o stdaln.o +gcc $CFLAGS -c $BWA_SRC/utils.c -o utils.o +g++ -dynamiclib -o libbwa.dylib org_broadinstitute_sting_alignment_bwa_BWACAligner.o bwa_gateway.o bntseq.o bwase.o bwt.o bwtaln.o bwtgap.o bwtio.o bwaseqio.o cs2nt.o kstring.o stdaln.o utils.o -framework JavaVM -lz \ No newline at end of file diff --git a/c/bwa/bwa_gateway.cpp b/c/bwa/bwa_gateway.cpp new file mode 100644 index 000000000..fdee16bf3 --- /dev/null +++ b/c/bwa/bwa_gateway.cpp @@ -0,0 +1,137 @@ +#include +#include + +#include "bwase.h" +#include "bwa_gateway.h" + +BWA::BWA(const char* ann_filename, + const char* amb_filename, + const char* pac_filename, + const char* forward_bwt_filename, + const char* forward_sa_filename, + const char* reverse_bwt_filename, + const char* reverse_sa_filename) +{ + bns = bns_restore_core(ann_filename,amb_filename,pac_filename); + bwts[0] = bwt_restore_bwt(forward_bwt_filename); + bwt_restore_sa(forward_sa_filename, bwts[0]); + bwts[1] = bwt_restore_bwt(reverse_bwt_filename); + bwt_restore_sa(reverse_sa_filename, bwts[1]); + load_default_options(); +} + +BWA::~BWA() { + bwt_destroy(bwts[0]); + bwt_destroy(bwts[1]); +} + +void BWA::align(const char* bases, const unsigned read_length, Alignment*& alignments, unsigned& num_alignments) +{ + bwa_seq_t sequence; + initialize_sequence(sequence); + copy_bases_into_sequence(sequence, bases, read_length, true); + + // Calculate the suffix array interval for each sequence, storing the result in sequence.aln (and sequence.n_aln). + // This method will destroy the contents of seq and rseq. + bwa_cal_sa_reg_gap(0,bwts,1,&sequence,&options); + + // Translate suffix array indices into exactly how many alignments have been found. + bwa_aln2seq(sequence.n_aln,sequence.aln,&sequence); + + // Calculate and refine the position for each alignment. This position may be inaccurate + // if the read contains indels, etc. Refinement requires the original sequences in the proper order. + copy_bases_into_sequence(sequence, bases, read_length, false); + create_alignments(sequence, alignments, num_alignments); +} + +void BWA::load_default_options() +{ + options.s_mm = 3; + options.s_gapo = 11; + options.s_gape = 4; + options.mode = 3; + options.indel_end_skip = 5; + options.max_del_occ = 10; + options.max_entries = 2000000; + options.fnr = 0.04; + options.max_diff = -1; + options.max_gapo = 1; + options.max_gape = 6; + options.max_seed_diff = 2; + options.seed_len = 2147483647; + options.n_threads = 1; + options.max_top2 = 30; + options.trim_qual = 0; +} + +void BWA::initialize_sequence(bwa_seq_t& sequence) +{ + sequence.tid = -1; + sequence.name = 0; + sequence.qual = 0; + + sequence.seq = NULL; + sequence.rseq = NULL; + + sequence.cigar = 0; + sequence.n_cigar = NULL; +} + +void BWA::copy_bases_into_sequence(bwa_seq_t& sequence, const char* bases, unsigned read_length, bool reverse) +{ + // seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap + sequence.seq = new ubyte_t[read_length]; + sequence.rseq = new ubyte_t[read_length]; + for(unsigned i = 0; i < read_length; i++) sequence.seq[i] = nst_nt4_table[(unsigned)bases[i]]; + memcpy(sequence.rseq,sequence.seq,read_length); + if(reverse) { + seq_reverse(read_length,sequence.seq,0); + seq_reverse(read_length,sequence.rseq,1); + } + sequence.full_len = sequence.len = read_length; +} + +void BWA::create_alignments(bwa_seq_t& sequence, Alignment*& alignments, unsigned& num_alignments) { + num_alignments = 0; + for(unsigned i = 0; i < (unsigned)sequence.n_aln; i++) + num_alignments += (sequence.aln + i)->l - (sequence.aln + i)->k + 1; + + alignments = new Alignment[num_alignments]; + unsigned alignment_idx = 0; + + // backup existing alignment blocks. + bwt_aln1_t* alignment_blocks = sequence.aln; + int num_alignment_blocks = sequence.n_aln; + + for(unsigned alignment_block_idx = 0; alignment_block_idx < (unsigned)num_alignment_blocks; alignment_block_idx++) { + // Stub in a 'working' alignment block, so that only the desired alignment is local-aligned. + const bwt_aln1_t* alignment_block = alignment_blocks + alignment_block_idx; + bwt_aln1_t working_alignment_block = *alignment_block; + + // Loop through all alignments, aligning each one individually. + for(unsigned sa_idx = alignment_block->k; sa_idx <= alignment_block->l; sa_idx++) { + working_alignment_block.k = working_alignment_block.l = sa_idx; + sequence.aln = &working_alignment_block; + sequence.n_aln = 1; + + // Calculate the local alignment. + bwa_cal_pac_pos_core(bwts[0],bwts[1],&sequence,options.max_diff,options.fnr); + bwa_refine_gapped(bns, 1, &sequence, 0, NULL); + + // Copy the local alignment data into the alignment object. + Alignment& alignment = *(alignments + alignment_idx); + alignment.type = sequence.type; + bns_coor_pac2real(bns, sequence.pos, pos_end(&sequence) - sequence.pos, &alignment.contig); + alignment.pos = sequence.pos - bns->anns[alignment.contig].offset + 1; + alignment.negative_strand = sequence.strand; + alignment.cigar = sequence.cigar; + alignment.n_cigar = sequence.n_cigar; + alignment.mapQ = sequence.mapQ; + + alignment_idx++; + } + } + + sequence.aln = alignment_blocks; + sequence.n_aln = num_alignment_blocks; +} diff --git a/c/bwa/bwa_gateway.h b/c/bwa/bwa_gateway.h new file mode 100644 index 000000000..f41079cf6 --- /dev/null +++ b/c/bwa/bwa_gateway.h @@ -0,0 +1,48 @@ +#ifndef BWA_GATEWAY +#define BWA_GATEWAY + +#include + +#include "bntseq.h" +#include "bwt.h" +#include "bwtaln.h" + +class Alignment; + +class BWA { + private: + bntseq_t *bns; + bwt_t* bwts[2]; + gap_opt_t options; + + void load_default_options(); + void initialize_sequence(bwa_seq_t& sequence); + void copy_bases_into_sequence(bwa_seq_t& sequence, const char* bases, const unsigned read_length, const bool reverse); + void create_alignments(bwa_seq_t& sequence, Alignment*& alignments, unsigned& num_alignments); + + public: + BWA(const char* ann_filename, + const char* amb_filename, + const char* pac_filename, + const char* forward_bwt_filename, + const char* forward_sa_filename, + const char* reverse_bwt_filename, + const char* reverse_sa_filename); + ~BWA(); + void align(const char* bases, const unsigned read_length, Alignment*& alignments, unsigned& num_alignments); +}; + +class Alignment { + public: + uint32_t type; + int contig; + bwtint_t pos; + bool negative_strand; + + uint16_t *cigar; + int n_cigar; + + uint32_t mapQ; +}; + +#endif // BWA_GATEWAY diff --git a/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.cpp b/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.cpp new file mode 100644 index 000000000..a5a68ee99 --- /dev/null +++ b/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.cpp @@ -0,0 +1,120 @@ +#include +#include +#include + +#include "bntseq.h" +#include "bwt.h" +#include "bwtaln.h" +#include "bwa_gateway.h" +#include "org_broadinstitute_sting_alignment_bwa_BWACAligner.h" + +static jclass java_alignment_array_class = NULL; +static jclass java_alignment_class = NULL; +static jmethodID java_alignment_constructor = NULL; + +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_create(JNIEnv* env, + jobject instance, + jstring java_ann, + jstring java_amb, + jstring java_pac, + jstring java_forward_bwt, + jstring java_forward_sa, + jstring java_reverse_bwt, + jstring java_reverse_sa) +{ + const char* ann_filename = env->GetStringUTFChars(java_ann, JNI_FALSE); + const char* amb_filename = env->GetStringUTFChars(java_amb, JNI_FALSE); + const char* pac_filename = env->GetStringUTFChars(java_pac, JNI_FALSE); + const char* forward_bwt_filename = env->GetStringUTFChars(java_forward_bwt, JNI_FALSE); + const char* forward_sa_filename = env->GetStringUTFChars(java_forward_sa, JNI_FALSE); + const char* reverse_bwt_filename = env->GetStringUTFChars(java_reverse_bwt, JNI_FALSE); + const char* reverse_sa_filename = env->GetStringUTFChars(java_reverse_sa, JNI_FALSE); + + BWA* bwa = new BWA(ann_filename, + amb_filename, + pac_filename, + forward_bwt_filename, + forward_sa_filename, + reverse_bwt_filename, + reverse_sa_filename); + + env->ReleaseStringUTFChars(java_ann,ann_filename); + env->ReleaseStringUTFChars(java_amb,amb_filename); + env->ReleaseStringUTFChars(java_pac,pac_filename); + env->ReleaseStringUTFChars(java_forward_bwt,forward_bwt_filename); + env->ReleaseStringUTFChars(java_forward_sa,forward_sa_filename); + env->ReleaseStringUTFChars(java_reverse_bwt,reverse_bwt_filename); + env->ReleaseStringUTFChars(java_reverse_sa,reverse_sa_filename); + + // Cache the class object for an array of alignments. + java_alignment_array_class = env->FindClass("[Lorg/broadinstitute/sting/alignment/Alignment;"); + java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment"); + java_alignment_constructor = env->GetMethodID(java_alignment_class, "", "(IIZI[C[I)V"); + + return (jlong)bwa; +} + +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_destroy(JNIEnv* env, jobject instance, jlong java_bwa) +{ + BWA* bwa = (BWA*)java_bwa; + delete bwa; +} + +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_align(JNIEnv* env, jobject object, jlong java_bwa, jbyteArray java_bases) { + BWA* bwa = (BWA*)java_bwa; + + const jsize read_length = env->GetArrayLength(java_bases); + jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); + + Alignment* alignments = NULL; + unsigned num_alignments = 1; + bwa->align((const char*)read_bases,read_length,alignments,num_alignments); + + jobjectArray java_alignments = env->NewObjectArray(num_alignments, env->FindClass("org/broadinstitute/sting/alignment/Alignment"), NULL); + + for(unsigned alignment_idx = 0; alignment_idx < (unsigned)num_alignments; alignment_idx++) { + Alignment& alignment = *(alignments + alignment_idx); + + unsigned cigar_length; + if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0; + else if(!alignment.cigar) cigar_length = 1; + else cigar_length = alignment.n_cigar; + + jcharArray java_cigar_operators = env->NewCharArray(cigar_length); + jintArray java_cigar_lengths = env->NewIntArray(cigar_length); + + if(alignment.cigar) { + for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) { + jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14]; + jint cigar_length = alignment.cigar[cigar_idx]&0x3fff; + env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator); + env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length); + } + } + else { + if(alignment.type != BWA_TYPE_NO_MATCH) { + jchar cigar_operator = 'M'; + env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator); + env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length); + } + } + + jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment"); + jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "", "(IIZI[C[I)V"); + jobject java_alignment = env->NewObject(java_alignment_class, + java_alignment_constructor, + alignment.contig, + alignment.pos, + alignment.negative_strand, + alignment.mapQ, + java_cigar_operators, + java_cigar_lengths); + env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment); + } + + delete[] alignments; + + env->ReleaseByteArrayElements(java_bases,read_bases,0); + + return java_alignments; +} diff --git a/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.h b/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.h new file mode 100644 index 000000000..cf3bb7ebf --- /dev/null +++ b/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.h @@ -0,0 +1,37 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_broadinstitute_sting_alignment_bwa_BWACAligner */ + +#ifndef _Included_org_broadinstitute_sting_alignment_bwa_BWACAligner +#define _Included_org_broadinstitute_sting_alignment_bwa_BWACAligner +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_broadinstitute_sting_alignment_bwa_BWACAligner + * Method: create + * Signature: (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_create + (JNIEnv *, jobject, jstring, jstring, jstring, jstring, jstring, jstring, jstring); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_BWACAligner + * Method: destroy + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_destroy + (JNIEnv *, jobject, jlong); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_BWACAligner + * Method: align + * Signature: (J[B)[Lorg/broadinstitute/sting/alignment/Alignment; + */ +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_align + (JNIEnv *, jobject, jlong, jbyteArray); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/src/org/broadinstitute/sting/alignment/Alignment.java b/java/src/org/broadinstitute/sting/alignment/Alignment.java index 8fd73e656..67857e6fa 100644 --- a/java/src/org/broadinstitute/sting/alignment/Alignment.java +++ b/java/src/org/broadinstitute/sting/alignment/Alignment.java @@ -1,27 +1,99 @@ package org.broadinstitute.sting.alignment; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.CigarElement; + /** * Represents an alignment of a read to a site in the reference genome. * * @author mhanna * @version 0.1 */ -public interface Alignment extends Comparable { +public class Alignment { + protected int contigIndex; + protected long alignmentStart; + protected boolean negativeStrand; + protected int mappingQuality; + + private char[] cigarOperators; + private int[] cigarLengths; + /** - * Is the given alignment on the reverse strand? - * @return True if the alignment is on the reverse strand. + * Gets the index of the given contig. + * @return the inde */ - public boolean isNegativeStrand(); + public int getContigIndex() { return contigIndex; } /** * Gets the starting position for the given alignment. * @return Starting position. */ - public long getAlignmentStart(); + public long getAlignmentStart() { return alignmentStart; } + + /** + * Is the given alignment on the reverse strand? + * @return True if the alignment is on the reverse strand. + */ + public boolean isNegativeStrand() { return negativeStrand; } /** * Gets the score of this alignment. * @return The score. */ - public int getScore(); + public int getMappingQuality() { return mappingQuality; } + + /** + * Gets the cigar for this alignment. + * @return sam-jdk formatted alignment. + */ + public Cigar getCigar() { + Cigar cigar = new Cigar(); + for(int i = 0; i < cigarOperators.length; i++) { + CigarOperator operator = CigarOperator.characterToEnum(cigarOperators[i]); + cigar.add(new CigarElement(cigarLengths[i],operator)); + } + return cigar; + } + + /** + * Temporarily implement getCigarString() for debugging; the TextCigarCodec is unfortunately + * package-protected. + * @return + */ + public String getCigarString() { + Cigar cigar = getCigar(); + if(cigar.isEmpty()) return "*"; + + StringBuilder cigarString = new StringBuilder(); + for(CigarElement element: cigar.getCigarElements()) { + cigarString.append(element.getLength()); + cigarString.append(element.getOperator()); + } + return cigarString.toString(); + } + + /** + * Stub for inheritance. + */ + public Alignment() {} + + /** + * Create a new alignment object. + * @param contigIndex The contig to which this read aligned. + * @param alignmentStart The point within the contig to which this read aligned. + * @param negativeStrand Forward or reverse alignment of the given read. + * @param mappingQuality How good does BWA think this mapping is? + * @param cigarOperators The ordered operators in the cigar string. + * @param cigarLengths The lengths to which each operator applies. + */ + public Alignment(int contigIndex, int alignmentStart, boolean negativeStrand, int mappingQuality, + char[] cigarOperators, int[] cigarLengths) { + this.contigIndex = contigIndex; + this.alignmentStart = alignmentStart; + this.negativeStrand = negativeStrand; + this.mappingQuality = mappingQuality; + this.cigarOperators = cigarOperators; + this.cigarLengths = cigarLengths; + } } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java index fa3e7acd4..a47ffd459 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -107,9 +107,9 @@ public class BWAAligner implements Aligner { if( alignment.getScore() > bestScore + MISMATCH_PENALTY ) break; - Byte[] bases = alignment.negativeStrand ? complementedBases : uncomplementedBases; - BWT bwt = alignment.negativeStrand ? forwardBWT : reverseBWT; - List lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds; + Byte[] bases = alignment.isNegativeStrand() ? complementedBases : uncomplementedBases; + BWT bwt = alignment.isNegativeStrand() ? forwardBWT : reverseBWT; + List lowerBounds = alignment.isNegativeStrand() ? reverseLowerBounds : forwardLowerBounds; // if z < D(i) then return {} int mismatches = maxDiff - alignment.getMismatches() - alignment.getGapOpens() - alignment.getGapExtensions(); @@ -128,12 +128,12 @@ public class BWAAligner implements Aligner { BWAAlignment finalAlignment = alignment.clone(); if( finalAlignment.isNegativeStrand() ) - finalAlignment.alignmentStart = forwardSuffixArray.get(bwtIndex) + 1; + finalAlignment.setAlignmentStart(forwardSuffixArray.get(bwtIndex) + 1); else { int sizeAlongReference = read.getReadLength() - finalAlignment.getNumberOfBasesMatchingState(AlignmentState.INSERTION) + finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION); - finalAlignment.alignmentStart = reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1; + finalAlignment.setAlignmentStart(reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1); } successfulMatches.add(finalAlignment); @@ -215,7 +215,7 @@ public class BWAAligner implements Aligner { */ private BWAAlignment createSeedAlignment(BWT bwt) { BWAAlignment seed = new BWAAlignment(this); - seed.negativeStrand = (bwt == forwardBWT); + seed.setNegativeStrand(bwt == forwardBWT); seed.position = -1; seed.loBound = 0; seed.hiBound = bwt.length(); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java index 10b558d60..19bbce0d4 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java @@ -11,7 +11,7 @@ import net.sf.samtools.Cigar; * @author mhanna * @version 0.1 */ -public class BWAAlignment implements Alignment, Cloneable { +public class BWAAlignment extends Alignment implements Cloneable { /** * Track the number of alignments that have been created. */ @@ -27,16 +27,6 @@ public class BWAAlignment implements Alignment, Cloneable { */ protected BWAAligner aligner; - /** - * Start of the final alignment. - */ - protected long alignmentStart; - - /** - * Is this match being treated as a negative or positive strand? - */ - protected boolean negativeStrand; - /** * The sequence of matches/mismatches/insertions/deletions. */ @@ -72,27 +62,19 @@ public class BWAAlignment implements Alignment, Cloneable { */ protected long hiBound; + protected void setAlignmentStart(long position) { + this.alignmentStart = position; + } + + protected void setNegativeStrand(boolean negativeStrand) { + this.negativeStrand = negativeStrand; + } + /** * Cache the score. */ private int score; - /** - * Gets the starting position for the given alignment. - * @return Starting position. - */ - public long getAlignmentStart() { - return alignmentStart; - } - - /** - * Is the given alignment on the reverse strand? - * @return True if the alignment is on the reverse strand. - */ - public boolean isNegativeStrand() { - return negativeStrand; - } - public Cigar getCigar() { return alignmentMatchSequence.convertToCigar(isNegativeStrand()); } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWACAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWACAligner.java new file mode 100644 index 000000000..1e3ace84d --- /dev/null +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWACAligner.java @@ -0,0 +1,130 @@ +package org.broadinstitute.sting.alignment.bwa; + +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.alignment.Alignment; + +import java.io.File; + +/** + * An aligner using the BWA/C implementation. + * + * @author mhanna + * @version 0.1 + */ +public class BWACAligner { + static { + System.loadLibrary("bwa"); + } + + /** + * A pointer to the C++ object representing the BWA engine. + */ + private long thunkPointer = 0; + + /** + * Create a pointer to the BWA/C thunk. + * @param annFileName Name of the ann file? + * @param ambFileName Name of the amb file? + * @param pacFileName Packed representation of the forward reference. + * @param forwardBWTFileName Name of the file where the forward BWT is stored. + * @param forwardSAFileName Name of te file where the forward suffix array is stored. + * @param reverseBWTFileName Name of the file where the reverse BWT is stored. + * @param reverseSAFileName Name of the file where the reverse SA is stored. + * @return Pointer to the BWA/C thunk. + */ + protected native long create(String annFileName, + String ambFileName, + String pacFileName, + String forwardBWTFileName, + String forwardSAFileName, + String reverseBWTFileName, + String reverseSAFileName); + + /** + * Destroy the + * @param thunkPointer Pointer to the allocated thunk. + */ + protected native void destroy(long thunkPointer); + + public BWACAligner(String annFileName, + String ambFileName, + String pacFileName, + String forwardBWTFileName, + String forwardSAFileName, + String reverseBWTFileName, + String reverseSAFileName) { + if(thunkPointer != 0) + throw new StingException("BWA/C attempting to reinitialize."); + thunkPointer = create(annFileName, + ambFileName, + pacFileName, + forwardBWTFileName, + forwardSAFileName, + reverseBWTFileName, + reverseSAFileName); + } + + /** + * Close this instance of the BWA pointer and delete its resources. + */ + public void close() { + if(thunkPointer == 0) + throw new StingException("BWA/C close attempted, but BWA/C was never properly initialized."); + destroy(thunkPointer); + } + + /** + * Align the given base array to the BWT. The base array should be in ASCII format. + * @param bases ASCII representation of byte array. + * @return an array of indices into the bwa. + */ + public Alignment[] align(byte[] bases) { + if(thunkPointer == 0) + throw new StingException("BWA/C align attempted, but BWA/C was never properly initialized."); + return align(thunkPointer,bases); + } + + /** + * Caller to the . The base array should be in + * ASCII format. + * @param thunkPointer pointer to the C++ object managing BWA/C. + * @param bases ASCII representation of byte array. + */ + protected native Alignment[] align(long thunkPointer, byte[] bases); + + public static void main(String[] args) { + String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta"; + BWACAligner thunk = new BWACAligner(prefix + ".ann", + prefix + ".amb", + prefix + ".pac", + prefix + ".bwt", + prefix + ".sa", + prefix + ".rbwt", + prefix + ".rsa"); + + SAMFileReader reader = new SAMFileReader(new File("/Users/mhanna/reference/Ecoli/MV1994.bam")); + reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); + + int count = 0; + + for(SAMRecord read: reader) { + count++; + //if(count > 1) break; + Alignment[] alignments = thunk.align(read.getReadBases()); + System.out.printf("Read: %s: ", read.getReadName()); + for(Alignment alignment: alignments) + System.out.printf("tig = %d; pos = %d, neg strand = %b, mapQ = %d, cigar = %s;", + alignment.getContigIndex(), + alignment.getAlignmentStart(), + alignment.isNegativeStrand(), + alignment.getMappingQuality(), + alignment.getCigarString()); + if(count % 10000 == 0) System.out.printf("Processed %d reads.%n",count); + System.out.printf("%n"); + } + + thunk.close(); + } +}