Initial version of BWA/C bindings. Still lots of squirrels roaming the code.
- Some cigar strings aren't right. - Memory leaks. - BWA codebase changes aren't committed to BWA tree. - Aligner interface butchered to support BWA/C-style alignments. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1928 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c4359bc340
commit
c9a3707cfd
|
|
@ -0,0 +1,18 @@
|
|||
CFLAGS="-g -Wall -O2 -m64"
|
||||
BWA_INCLUDE="/Users/mhanna/src/bwa-0.5.3"
|
||||
BWA_SRC="/Users/mhanna/src/bwa-0.5.3"
|
||||
|
||||
g++ $CFLAGS -c -I$BWA_INCLUDE -I/System/Library/Frameworks/JavaVM.framework/Headers org_broadinstitute_sting_alignment_bwa_BWACAligner.cpp
|
||||
g++ $CFLAGS -c -I$BWA_INCLUDE bwa_gateway.cpp -o bwa_gateway.o
|
||||
gcc $CFLAGS -c $BWA_SRC/bntseq.c -o bntseq.o
|
||||
gcc $CFLAGS -c $BWA_SRC/bwase.c -o bwase.o
|
||||
gcc $CFLAGS -c $BWA_SRC/bwt.c -o bwt.o
|
||||
gcc $CFLAGS -c $BWA_SRC/bwtaln.c -o bwtaln.o
|
||||
gcc $CFLAGS -c $BWA_SRC/bwtgap.c -o bwtgap.o
|
||||
gcc $CFLAGS -c $BWA_SRC/bwtio.c -o bwtio.o
|
||||
gcc $CFLAGS -c $BWA_SRC/bwaseqio.c -o bwaseqio.o
|
||||
gcc $CFLAGS -c $BWA_SRC/cs2nt.c -o cs2nt.o
|
||||
gcc $CFLAGS -c $BWA_SRC/kstring.c -o kstring.o
|
||||
gcc $CFLAGS -c $BWA_SRC/stdaln.c -o stdaln.o
|
||||
gcc $CFLAGS -c $BWA_SRC/utils.c -o utils.o
|
||||
g++ -dynamiclib -o libbwa.dylib org_broadinstitute_sting_alignment_bwa_BWACAligner.o bwa_gateway.o bntseq.o bwase.o bwt.o bwtaln.o bwtgap.o bwtio.o bwaseqio.o cs2nt.o kstring.o stdaln.o utils.o -framework JavaVM -lz
|
||||
|
|
@ -0,0 +1,137 @@
|
|||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#include "bwase.h"
|
||||
#include "bwa_gateway.h"
|
||||
|
||||
BWA::BWA(const char* ann_filename,
|
||||
const char* amb_filename,
|
||||
const char* pac_filename,
|
||||
const char* forward_bwt_filename,
|
||||
const char* forward_sa_filename,
|
||||
const char* reverse_bwt_filename,
|
||||
const char* reverse_sa_filename)
|
||||
{
|
||||
bns = bns_restore_core(ann_filename,amb_filename,pac_filename);
|
||||
bwts[0] = bwt_restore_bwt(forward_bwt_filename);
|
||||
bwt_restore_sa(forward_sa_filename, bwts[0]);
|
||||
bwts[1] = bwt_restore_bwt(reverse_bwt_filename);
|
||||
bwt_restore_sa(reverse_sa_filename, bwts[1]);
|
||||
load_default_options();
|
||||
}
|
||||
|
||||
BWA::~BWA() {
|
||||
bwt_destroy(bwts[0]);
|
||||
bwt_destroy(bwts[1]);
|
||||
}
|
||||
|
||||
void BWA::align(const char* bases, const unsigned read_length, Alignment*& alignments, unsigned& num_alignments)
|
||||
{
|
||||
bwa_seq_t sequence;
|
||||
initialize_sequence(sequence);
|
||||
copy_bases_into_sequence(sequence, bases, read_length, true);
|
||||
|
||||
// Calculate the suffix array interval for each sequence, storing the result in sequence.aln (and sequence.n_aln).
|
||||
// This method will destroy the contents of seq and rseq.
|
||||
bwa_cal_sa_reg_gap(0,bwts,1,&sequence,&options);
|
||||
|
||||
// Translate suffix array indices into exactly how many alignments have been found.
|
||||
bwa_aln2seq(sequence.n_aln,sequence.aln,&sequence);
|
||||
|
||||
// Calculate and refine the position for each alignment. This position may be inaccurate
|
||||
// if the read contains indels, etc. Refinement requires the original sequences in the proper order.
|
||||
copy_bases_into_sequence(sequence, bases, read_length, false);
|
||||
create_alignments(sequence, alignments, num_alignments);
|
||||
}
|
||||
|
||||
void BWA::load_default_options()
|
||||
{
|
||||
options.s_mm = 3;
|
||||
options.s_gapo = 11;
|
||||
options.s_gape = 4;
|
||||
options.mode = 3;
|
||||
options.indel_end_skip = 5;
|
||||
options.max_del_occ = 10;
|
||||
options.max_entries = 2000000;
|
||||
options.fnr = 0.04;
|
||||
options.max_diff = -1;
|
||||
options.max_gapo = 1;
|
||||
options.max_gape = 6;
|
||||
options.max_seed_diff = 2;
|
||||
options.seed_len = 2147483647;
|
||||
options.n_threads = 1;
|
||||
options.max_top2 = 30;
|
||||
options.trim_qual = 0;
|
||||
}
|
||||
|
||||
void BWA::initialize_sequence(bwa_seq_t& sequence)
|
||||
{
|
||||
sequence.tid = -1;
|
||||
sequence.name = 0;
|
||||
sequence.qual = 0;
|
||||
|
||||
sequence.seq = NULL;
|
||||
sequence.rseq = NULL;
|
||||
|
||||
sequence.cigar = 0;
|
||||
sequence.n_cigar = NULL;
|
||||
}
|
||||
|
||||
void BWA::copy_bases_into_sequence(bwa_seq_t& sequence, const char* bases, unsigned read_length, bool reverse)
|
||||
{
|
||||
// seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap
|
||||
sequence.seq = new ubyte_t[read_length];
|
||||
sequence.rseq = new ubyte_t[read_length];
|
||||
for(unsigned i = 0; i < read_length; i++) sequence.seq[i] = nst_nt4_table[(unsigned)bases[i]];
|
||||
memcpy(sequence.rseq,sequence.seq,read_length);
|
||||
if(reverse) {
|
||||
seq_reverse(read_length,sequence.seq,0);
|
||||
seq_reverse(read_length,sequence.rseq,1);
|
||||
}
|
||||
sequence.full_len = sequence.len = read_length;
|
||||
}
|
||||
|
||||
void BWA::create_alignments(bwa_seq_t& sequence, Alignment*& alignments, unsigned& num_alignments) {
|
||||
num_alignments = 0;
|
||||
for(unsigned i = 0; i < (unsigned)sequence.n_aln; i++)
|
||||
num_alignments += (sequence.aln + i)->l - (sequence.aln + i)->k + 1;
|
||||
|
||||
alignments = new Alignment[num_alignments];
|
||||
unsigned alignment_idx = 0;
|
||||
|
||||
// backup existing alignment blocks.
|
||||
bwt_aln1_t* alignment_blocks = sequence.aln;
|
||||
int num_alignment_blocks = sequence.n_aln;
|
||||
|
||||
for(unsigned alignment_block_idx = 0; alignment_block_idx < (unsigned)num_alignment_blocks; alignment_block_idx++) {
|
||||
// Stub in a 'working' alignment block, so that only the desired alignment is local-aligned.
|
||||
const bwt_aln1_t* alignment_block = alignment_blocks + alignment_block_idx;
|
||||
bwt_aln1_t working_alignment_block = *alignment_block;
|
||||
|
||||
// Loop through all alignments, aligning each one individually.
|
||||
for(unsigned sa_idx = alignment_block->k; sa_idx <= alignment_block->l; sa_idx++) {
|
||||
working_alignment_block.k = working_alignment_block.l = sa_idx;
|
||||
sequence.aln = &working_alignment_block;
|
||||
sequence.n_aln = 1;
|
||||
|
||||
// Calculate the local alignment.
|
||||
bwa_cal_pac_pos_core(bwts[0],bwts[1],&sequence,options.max_diff,options.fnr);
|
||||
bwa_refine_gapped(bns, 1, &sequence, 0, NULL);
|
||||
|
||||
// Copy the local alignment data into the alignment object.
|
||||
Alignment& alignment = *(alignments + alignment_idx);
|
||||
alignment.type = sequence.type;
|
||||
bns_coor_pac2real(bns, sequence.pos, pos_end(&sequence) - sequence.pos, &alignment.contig);
|
||||
alignment.pos = sequence.pos - bns->anns[alignment.contig].offset + 1;
|
||||
alignment.negative_strand = sequence.strand;
|
||||
alignment.cigar = sequence.cigar;
|
||||
alignment.n_cigar = sequence.n_cigar;
|
||||
alignment.mapQ = sequence.mapQ;
|
||||
|
||||
alignment_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
sequence.aln = alignment_blocks;
|
||||
sequence.n_aln = num_alignment_blocks;
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
#ifndef BWA_GATEWAY
|
||||
#define BWA_GATEWAY
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include "bntseq.h"
|
||||
#include "bwt.h"
|
||||
#include "bwtaln.h"
|
||||
|
||||
class Alignment;
|
||||
|
||||
class BWA {
|
||||
private:
|
||||
bntseq_t *bns;
|
||||
bwt_t* bwts[2];
|
||||
gap_opt_t options;
|
||||
|
||||
void load_default_options();
|
||||
void initialize_sequence(bwa_seq_t& sequence);
|
||||
void copy_bases_into_sequence(bwa_seq_t& sequence, const char* bases, const unsigned read_length, const bool reverse);
|
||||
void create_alignments(bwa_seq_t& sequence, Alignment*& alignments, unsigned& num_alignments);
|
||||
|
||||
public:
|
||||
BWA(const char* ann_filename,
|
||||
const char* amb_filename,
|
||||
const char* pac_filename,
|
||||
const char* forward_bwt_filename,
|
||||
const char* forward_sa_filename,
|
||||
const char* reverse_bwt_filename,
|
||||
const char* reverse_sa_filename);
|
||||
~BWA();
|
||||
void align(const char* bases, const unsigned read_length, Alignment*& alignments, unsigned& num_alignments);
|
||||
};
|
||||
|
||||
class Alignment {
|
||||
public:
|
||||
uint32_t type;
|
||||
int contig;
|
||||
bwtint_t pos;
|
||||
bool negative_strand;
|
||||
|
||||
uint16_t *cigar;
|
||||
int n_cigar;
|
||||
|
||||
uint32_t mapQ;
|
||||
};
|
||||
|
||||
#endif // BWA_GATEWAY
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "bntseq.h"
|
||||
#include "bwt.h"
|
||||
#include "bwtaln.h"
|
||||
#include "bwa_gateway.h"
|
||||
#include "org_broadinstitute_sting_alignment_bwa_BWACAligner.h"
|
||||
|
||||
static jclass java_alignment_array_class = NULL;
|
||||
static jclass java_alignment_class = NULL;
|
||||
static jmethodID java_alignment_constructor = NULL;
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_create(JNIEnv* env,
|
||||
jobject instance,
|
||||
jstring java_ann,
|
||||
jstring java_amb,
|
||||
jstring java_pac,
|
||||
jstring java_forward_bwt,
|
||||
jstring java_forward_sa,
|
||||
jstring java_reverse_bwt,
|
||||
jstring java_reverse_sa)
|
||||
{
|
||||
const char* ann_filename = env->GetStringUTFChars(java_ann, JNI_FALSE);
|
||||
const char* amb_filename = env->GetStringUTFChars(java_amb, JNI_FALSE);
|
||||
const char* pac_filename = env->GetStringUTFChars(java_pac, JNI_FALSE);
|
||||
const char* forward_bwt_filename = env->GetStringUTFChars(java_forward_bwt, JNI_FALSE);
|
||||
const char* forward_sa_filename = env->GetStringUTFChars(java_forward_sa, JNI_FALSE);
|
||||
const char* reverse_bwt_filename = env->GetStringUTFChars(java_reverse_bwt, JNI_FALSE);
|
||||
const char* reverse_sa_filename = env->GetStringUTFChars(java_reverse_sa, JNI_FALSE);
|
||||
|
||||
BWA* bwa = new BWA(ann_filename,
|
||||
amb_filename,
|
||||
pac_filename,
|
||||
forward_bwt_filename,
|
||||
forward_sa_filename,
|
||||
reverse_bwt_filename,
|
||||
reverse_sa_filename);
|
||||
|
||||
env->ReleaseStringUTFChars(java_ann,ann_filename);
|
||||
env->ReleaseStringUTFChars(java_amb,amb_filename);
|
||||
env->ReleaseStringUTFChars(java_pac,pac_filename);
|
||||
env->ReleaseStringUTFChars(java_forward_bwt,forward_bwt_filename);
|
||||
env->ReleaseStringUTFChars(java_forward_sa,forward_sa_filename);
|
||||
env->ReleaseStringUTFChars(java_reverse_bwt,reverse_bwt_filename);
|
||||
env->ReleaseStringUTFChars(java_reverse_sa,reverse_sa_filename);
|
||||
|
||||
// Cache the class object for an array of alignments.
|
||||
java_alignment_array_class = env->FindClass("[Lorg/broadinstitute/sting/alignment/Alignment;");
|
||||
java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment");
|
||||
java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[I)V");
|
||||
|
||||
return (jlong)bwa;
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_destroy(JNIEnv* env, jobject instance, jlong java_bwa)
|
||||
{
|
||||
BWA* bwa = (BWA*)java_bwa;
|
||||
delete bwa;
|
||||
}
|
||||
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_align(JNIEnv* env, jobject object, jlong java_bwa, jbyteArray java_bases) {
|
||||
BWA* bwa = (BWA*)java_bwa;
|
||||
|
||||
const jsize read_length = env->GetArrayLength(java_bases);
|
||||
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
||||
|
||||
Alignment* alignments = NULL;
|
||||
unsigned num_alignments = 1;
|
||||
bwa->align((const char*)read_bases,read_length,alignments,num_alignments);
|
||||
|
||||
jobjectArray java_alignments = env->NewObjectArray(num_alignments, env->FindClass("org/broadinstitute/sting/alignment/Alignment"), NULL);
|
||||
|
||||
for(unsigned alignment_idx = 0; alignment_idx < (unsigned)num_alignments; alignment_idx++) {
|
||||
Alignment& alignment = *(alignments + alignment_idx);
|
||||
|
||||
unsigned cigar_length;
|
||||
if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0;
|
||||
else if(!alignment.cigar) cigar_length = 1;
|
||||
else cigar_length = alignment.n_cigar;
|
||||
|
||||
jcharArray java_cigar_operators = env->NewCharArray(cigar_length);
|
||||
jintArray java_cigar_lengths = env->NewIntArray(cigar_length);
|
||||
|
||||
if(alignment.cigar) {
|
||||
for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) {
|
||||
jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14];
|
||||
jint cigar_length = alignment.cigar[cigar_idx]&0x3fff;
|
||||
env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator);
|
||||
env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(alignment.type != BWA_TYPE_NO_MATCH) {
|
||||
jchar cigar_operator = 'M';
|
||||
env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator);
|
||||
env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length);
|
||||
}
|
||||
}
|
||||
|
||||
jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment");
|
||||
jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[I)V");
|
||||
jobject java_alignment = env->NewObject(java_alignment_class,
|
||||
java_alignment_constructor,
|
||||
alignment.contig,
|
||||
alignment.pos,
|
||||
alignment.negative_strand,
|
||||
alignment.mapQ,
|
||||
java_cigar_operators,
|
||||
java_cigar_lengths);
|
||||
env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment);
|
||||
}
|
||||
|
||||
delete[] alignments;
|
||||
|
||||
env->ReleaseByteArrayElements(java_bases,read_bases,0);
|
||||
|
||||
return java_alignments;
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
/* DO NOT EDIT THIS FILE - it is machine generated */
|
||||
#include <jni.h>
|
||||
/* Header for class org_broadinstitute_sting_alignment_bwa_BWACAligner */
|
||||
|
||||
#ifndef _Included_org_broadinstitute_sting_alignment_bwa_BWACAligner
|
||||
#define _Included_org_broadinstitute_sting_alignment_bwa_BWACAligner
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_BWACAligner
|
||||
* Method: create
|
||||
* Signature: (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)J
|
||||
*/
|
||||
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_create
|
||||
(JNIEnv *, jobject, jstring, jstring, jstring, jstring, jstring, jstring, jstring);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_BWACAligner
|
||||
* Method: destroy
|
||||
* Signature: (J)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_destroy
|
||||
(JNIEnv *, jobject, jlong);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_BWACAligner
|
||||
* Method: align
|
||||
* Signature: (J[B)[Lorg/broadinstitute/sting/alignment/Alignment;
|
||||
*/
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_align
|
||||
(JNIEnv *, jobject, jlong, jbyteArray);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,27 +1,99 @@
|
|||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.CigarElement;
|
||||
|
||||
/**
|
||||
* Represents an alignment of a read to a site in the reference genome.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public interface Alignment extends Comparable<Alignment> {
|
||||
public class Alignment {
|
||||
protected int contigIndex;
|
||||
protected long alignmentStart;
|
||||
protected boolean negativeStrand;
|
||||
protected int mappingQuality;
|
||||
|
||||
private char[] cigarOperators;
|
||||
private int[] cigarLengths;
|
||||
|
||||
/**
|
||||
* Is the given alignment on the reverse strand?
|
||||
* @return True if the alignment is on the reverse strand.
|
||||
* Gets the index of the given contig.
|
||||
* @return the inde
|
||||
*/
|
||||
public boolean isNegativeStrand();
|
||||
public int getContigIndex() { return contigIndex; }
|
||||
|
||||
/**
|
||||
* Gets the starting position for the given alignment.
|
||||
* @return Starting position.
|
||||
*/
|
||||
public long getAlignmentStart();
|
||||
public long getAlignmentStart() { return alignmentStart; }
|
||||
|
||||
/**
|
||||
* Is the given alignment on the reverse strand?
|
||||
* @return True if the alignment is on the reverse strand.
|
||||
*/
|
||||
public boolean isNegativeStrand() { return negativeStrand; }
|
||||
|
||||
/**
|
||||
* Gets the score of this alignment.
|
||||
* @return The score.
|
||||
*/
|
||||
public int getScore();
|
||||
public int getMappingQuality() { return mappingQuality; }
|
||||
|
||||
/**
|
||||
* Gets the cigar for this alignment.
|
||||
* @return sam-jdk formatted alignment.
|
||||
*/
|
||||
public Cigar getCigar() {
|
||||
Cigar cigar = new Cigar();
|
||||
for(int i = 0; i < cigarOperators.length; i++) {
|
||||
CigarOperator operator = CigarOperator.characterToEnum(cigarOperators[i]);
|
||||
cigar.add(new CigarElement(cigarLengths[i],operator));
|
||||
}
|
||||
return cigar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Temporarily implement getCigarString() for debugging; the TextCigarCodec is unfortunately
|
||||
* package-protected.
|
||||
* @return
|
||||
*/
|
||||
public String getCigarString() {
|
||||
Cigar cigar = getCigar();
|
||||
if(cigar.isEmpty()) return "*";
|
||||
|
||||
StringBuilder cigarString = new StringBuilder();
|
||||
for(CigarElement element: cigar.getCigarElements()) {
|
||||
cigarString.append(element.getLength());
|
||||
cigarString.append(element.getOperator());
|
||||
}
|
||||
return cigarString.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stub for inheritance.
|
||||
*/
|
||||
public Alignment() {}
|
||||
|
||||
/**
|
||||
* Create a new alignment object.
|
||||
* @param contigIndex The contig to which this read aligned.
|
||||
* @param alignmentStart The point within the contig to which this read aligned.
|
||||
* @param negativeStrand Forward or reverse alignment of the given read.
|
||||
* @param mappingQuality How good does BWA think this mapping is?
|
||||
* @param cigarOperators The ordered operators in the cigar string.
|
||||
* @param cigarLengths The lengths to which each operator applies.
|
||||
*/
|
||||
public Alignment(int contigIndex, int alignmentStart, boolean negativeStrand, int mappingQuality,
|
||||
char[] cigarOperators, int[] cigarLengths) {
|
||||
this.contigIndex = contigIndex;
|
||||
this.alignmentStart = alignmentStart;
|
||||
this.negativeStrand = negativeStrand;
|
||||
this.mappingQuality = mappingQuality;
|
||||
this.cigarOperators = cigarOperators;
|
||||
this.cigarLengths = cigarLengths;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -107,9 +107,9 @@ public class BWAAligner implements Aligner {
|
|||
if( alignment.getScore() > bestScore + MISMATCH_PENALTY )
|
||||
break;
|
||||
|
||||
Byte[] bases = alignment.negativeStrand ? complementedBases : uncomplementedBases;
|
||||
BWT bwt = alignment.negativeStrand ? forwardBWT : reverseBWT;
|
||||
List<LowerBound> lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds;
|
||||
Byte[] bases = alignment.isNegativeStrand() ? complementedBases : uncomplementedBases;
|
||||
BWT bwt = alignment.isNegativeStrand() ? forwardBWT : reverseBWT;
|
||||
List<LowerBound> lowerBounds = alignment.isNegativeStrand() ? reverseLowerBounds : forwardLowerBounds;
|
||||
|
||||
// if z < D(i) then return {}
|
||||
int mismatches = maxDiff - alignment.getMismatches() - alignment.getGapOpens() - alignment.getGapExtensions();
|
||||
|
|
@ -128,12 +128,12 @@ public class BWAAligner implements Aligner {
|
|||
BWAAlignment finalAlignment = alignment.clone();
|
||||
|
||||
if( finalAlignment.isNegativeStrand() )
|
||||
finalAlignment.alignmentStart = forwardSuffixArray.get(bwtIndex) + 1;
|
||||
finalAlignment.setAlignmentStart(forwardSuffixArray.get(bwtIndex) + 1);
|
||||
else {
|
||||
int sizeAlongReference = read.getReadLength() -
|
||||
finalAlignment.getNumberOfBasesMatchingState(AlignmentState.INSERTION) +
|
||||
finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
||||
finalAlignment.alignmentStart = reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1;
|
||||
finalAlignment.setAlignmentStart(reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1);
|
||||
}
|
||||
|
||||
successfulMatches.add(finalAlignment);
|
||||
|
|
@ -215,7 +215,7 @@ public class BWAAligner implements Aligner {
|
|||
*/
|
||||
private BWAAlignment createSeedAlignment(BWT bwt) {
|
||||
BWAAlignment seed = new BWAAlignment(this);
|
||||
seed.negativeStrand = (bwt == forwardBWT);
|
||||
seed.setNegativeStrand(bwt == forwardBWT);
|
||||
seed.position = -1;
|
||||
seed.loBound = 0;
|
||||
seed.hiBound = bwt.length();
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import net.sf.samtools.Cigar;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWAAlignment implements Alignment, Cloneable {
|
||||
public class BWAAlignment extends Alignment implements Cloneable {
|
||||
/**
|
||||
* Track the number of alignments that have been created.
|
||||
*/
|
||||
|
|
@ -27,16 +27,6 @@ public class BWAAlignment implements Alignment, Cloneable {
|
|||
*/
|
||||
protected BWAAligner aligner;
|
||||
|
||||
/**
|
||||
* Start of the final alignment.
|
||||
*/
|
||||
protected long alignmentStart;
|
||||
|
||||
/**
|
||||
* Is this match being treated as a negative or positive strand?
|
||||
*/
|
||||
protected boolean negativeStrand;
|
||||
|
||||
/**
|
||||
* The sequence of matches/mismatches/insertions/deletions.
|
||||
*/
|
||||
|
|
@ -72,27 +62,19 @@ public class BWAAlignment implements Alignment, Cloneable {
|
|||
*/
|
||||
protected long hiBound;
|
||||
|
||||
protected void setAlignmentStart(long position) {
|
||||
this.alignmentStart = position;
|
||||
}
|
||||
|
||||
protected void setNegativeStrand(boolean negativeStrand) {
|
||||
this.negativeStrand = negativeStrand;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache the score.
|
||||
*/
|
||||
private int score;
|
||||
|
||||
/**
|
||||
* Gets the starting position for the given alignment.
|
||||
* @return Starting position.
|
||||
*/
|
||||
public long getAlignmentStart() {
|
||||
return alignmentStart;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the given alignment on the reverse strand?
|
||||
* @return True if the alignment is on the reverse strand.
|
||||
*/
|
||||
public boolean isNegativeStrand() {
|
||||
return negativeStrand;
|
||||
}
|
||||
|
||||
public Cigar getCigar() {
|
||||
return alignmentMatchSequence.convertToCigar(isNegativeStrand());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,130 @@
|
|||
package org.broadinstitute.sting.alignment.bwa;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* An aligner using the BWA/C implementation.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWACAligner {
|
||||
static {
|
||||
System.loadLibrary("bwa");
|
||||
}
|
||||
|
||||
/**
|
||||
* A pointer to the C++ object representing the BWA engine.
|
||||
*/
|
||||
private long thunkPointer = 0;
|
||||
|
||||
/**
|
||||
* Create a pointer to the BWA/C thunk.
|
||||
* @param annFileName Name of the ann file?
|
||||
* @param ambFileName Name of the amb file?
|
||||
* @param pacFileName Packed representation of the forward reference.
|
||||
* @param forwardBWTFileName Name of the file where the forward BWT is stored.
|
||||
* @param forwardSAFileName Name of te file where the forward suffix array is stored.
|
||||
* @param reverseBWTFileName Name of the file where the reverse BWT is stored.
|
||||
* @param reverseSAFileName Name of the file where the reverse SA is stored.
|
||||
* @return Pointer to the BWA/C thunk.
|
||||
*/
|
||||
protected native long create(String annFileName,
|
||||
String ambFileName,
|
||||
String pacFileName,
|
||||
String forwardBWTFileName,
|
||||
String forwardSAFileName,
|
||||
String reverseBWTFileName,
|
||||
String reverseSAFileName);
|
||||
|
||||
/**
|
||||
* Destroy the
|
||||
* @param thunkPointer Pointer to the allocated thunk.
|
||||
*/
|
||||
protected native void destroy(long thunkPointer);
|
||||
|
||||
public BWACAligner(String annFileName,
|
||||
String ambFileName,
|
||||
String pacFileName,
|
||||
String forwardBWTFileName,
|
||||
String forwardSAFileName,
|
||||
String reverseBWTFileName,
|
||||
String reverseSAFileName) {
|
||||
if(thunkPointer != 0)
|
||||
throw new StingException("BWA/C attempting to reinitialize.");
|
||||
thunkPointer = create(annFileName,
|
||||
ambFileName,
|
||||
pacFileName,
|
||||
forwardBWTFileName,
|
||||
forwardSAFileName,
|
||||
reverseBWTFileName,
|
||||
reverseSAFileName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Close this instance of the BWA pointer and delete its resources.
|
||||
*/
|
||||
public void close() {
|
||||
if(thunkPointer == 0)
|
||||
throw new StingException("BWA/C close attempted, but BWA/C was never properly initialized.");
|
||||
destroy(thunkPointer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Align the given base array to the BWT. The base array should be in ASCII format.
|
||||
* @param bases ASCII representation of byte array.
|
||||
* @return an array of indices into the bwa.
|
||||
*/
|
||||
public Alignment[] align(byte[] bases) {
|
||||
if(thunkPointer == 0)
|
||||
throw new StingException("BWA/C align attempted, but BWA/C was never properly initialized.");
|
||||
return align(thunkPointer,bases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Caller to the . The base array should be in
|
||||
* ASCII format.
|
||||
* @param thunkPointer pointer to the C++ object managing BWA/C.
|
||||
* @param bases ASCII representation of byte array.
|
||||
*/
|
||||
protected native Alignment[] align(long thunkPointer, byte[] bases);
|
||||
|
||||
public static void main(String[] args) {
|
||||
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
|
||||
BWACAligner thunk = new BWACAligner(prefix + ".ann",
|
||||
prefix + ".amb",
|
||||
prefix + ".pac",
|
||||
prefix + ".bwt",
|
||||
prefix + ".sa",
|
||||
prefix + ".rbwt",
|
||||
prefix + ".rsa");
|
||||
|
||||
SAMFileReader reader = new SAMFileReader(new File("/Users/mhanna/reference/Ecoli/MV1994.bam"));
|
||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||
|
||||
int count = 0;
|
||||
|
||||
for(SAMRecord read: reader) {
|
||||
count++;
|
||||
//if(count > 1) break;
|
||||
Alignment[] alignments = thunk.align(read.getReadBases());
|
||||
System.out.printf("Read: %s: ", read.getReadName());
|
||||
for(Alignment alignment: alignments)
|
||||
System.out.printf("tig = %d; pos = %d, neg strand = %b, mapQ = %d, cigar = %s;",
|
||||
alignment.getContigIndex(),
|
||||
alignment.getAlignmentStart(),
|
||||
alignment.isNegativeStrand(),
|
||||
alignment.getMappingQuality(),
|
||||
alignment.getCigarString());
|
||||
if(count % 10000 == 0) System.out.printf("Processed %d reads.%n",count);
|
||||
System.out.printf("%n");
|
||||
}
|
||||
|
||||
thunk.close();
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue