Checkpoint. Add first phase of single alignment interface.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2094 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
306f4624c6
commit
a972b2769f
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/sh
|
||||
export BWA_HOME="/humgen/gsa-hphome1/hanna/src/bwa-0.5.3"
|
||||
export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa"
|
||||
export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux"
|
||||
export TARGET_LIB="libbwa.so"
|
||||
export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread"
|
||||
|
|
|
|||
|
|
@ -36,22 +36,114 @@ BWA::~BWA() {
|
|||
bwt_destroy(bwts[1]);
|
||||
}
|
||||
|
||||
void BWA::align(const char* bases, const unsigned read_length, Alignment*& alignments, unsigned& num_alignments)
|
||||
void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count)
|
||||
{
|
||||
bwa_seq_t* sequence = create_sequence();
|
||||
copy_bases_into_sequence(sequence, bases, read_length);
|
||||
|
||||
// Calculate the suffix array interval for each sequence, storing the result in sequence.aln (and sequence.n_aln).
|
||||
// Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln).
|
||||
// This method will destroy the contents of seq and rseq.
|
||||
bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);
|
||||
|
||||
// Translate suffix array indices into exactly how many alignments have been found.
|
||||
paths = new bwt_aln1_t[sequence->n_aln];
|
||||
memcpy(paths,sequence->aln,sequence->n_aln*sizeof(bwt_aln1_t));
|
||||
num_paths = sequence->n_aln;
|
||||
|
||||
// Call aln2seq to initialize the type of match present.
|
||||
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
|
||||
best_path_count = sequence->c1;
|
||||
second_best_path_count = sequence->c2;
|
||||
|
||||
bwa_free_read_seq(1,sequence);
|
||||
}
|
||||
|
||||
void BWA::generate_alignments_from_paths(const char* bases,
|
||||
const unsigned read_length,
|
||||
bwt_aln1_t* paths,
|
||||
const unsigned num_paths,
|
||||
const unsigned best_count,
|
||||
const unsigned second_best_count,
|
||||
Alignment*& alignments,
|
||||
unsigned& num_alignments)
|
||||
{
|
||||
bwa_seq_t* sequence = create_sequence();
|
||||
copy_bases_into_sequence(sequence, bases, read_length);
|
||||
|
||||
sequence->aln = paths;
|
||||
sequence->n_aln = num_paths;
|
||||
|
||||
// (Ab)use bwa_aln2seq to propagate values stored in the path out into the sequence itself.
|
||||
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
|
||||
|
||||
// Calculate and refine the position for each alignment. This position may be inaccurate
|
||||
// if the read contains indels, etc. Refinement requires the original sequences in the proper order.
|
||||
copy_bases_into_sequence(sequence, bases, read_length);
|
||||
create_alignments(sequence, alignments, num_alignments);
|
||||
// But overwrite key parts of the sequence in case the user passed back only a smaller subset
|
||||
// of the paths.
|
||||
sequence->c1 = best_count;
|
||||
sequence->c2 = second_best_count;
|
||||
sequence->type = sequence->c1 > 1 ? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE;
|
||||
|
||||
num_alignments = 0;
|
||||
for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++)
|
||||
num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1;
|
||||
|
||||
alignments = new Alignment[num_alignments];
|
||||
unsigned alignment_idx = 0;
|
||||
|
||||
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
|
||||
// Stub in a 'working' path, so that only the desired alignment is local-aligned.
|
||||
const bwt_aln1_t* path = paths + path_idx;
|
||||
bwt_aln1_t working_path = *path;
|
||||
|
||||
// Loop through all alignments, aligning each one individually.
|
||||
for(unsigned sa_idx = path->k; sa_idx <= path->l; sa_idx++) {
|
||||
working_path.k = working_path.l = sa_idx;
|
||||
sequence->aln = &working_path;
|
||||
sequence->n_aln = 1;
|
||||
|
||||
sequence->sa = sa_idx;
|
||||
sequence->strand = path->a;
|
||||
sequence->score = path->score;
|
||||
|
||||
// Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse.
|
||||
// TODO: Fix the interface to bwa_refine_gapped so its easier to work with.
|
||||
if(alignment_idx > 0)
|
||||
seq_reverse(sequence->len, sequence->seq, 0);
|
||||
|
||||
// Calculate the local coordinate and local alignment.
|
||||
bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr);
|
||||
bwa_refine_gapped(bns, 1, sequence, reference, NULL);
|
||||
|
||||
// Copy the local alignment data into the alignment object.
|
||||
Alignment& alignment = *(alignments + alignment_idx);
|
||||
|
||||
// Populate basic path info
|
||||
alignment.num_mismatches = sequence->n_mm;
|
||||
alignment.num_gap_opens = sequence->n_gapo;
|
||||
alignment.num_gap_extensions = sequence->n_gape;
|
||||
alignment.num_best = sequence->c1;
|
||||
alignment.num_second_best = sequence->c2;
|
||||
|
||||
alignment.type = sequence->type;
|
||||
bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig);
|
||||
alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1;
|
||||
alignment.negative_strand = sequence->strand;
|
||||
alignment.mapping_quality = sequence->mapQ;
|
||||
|
||||
alignment.cigar = NULL;
|
||||
if(sequence->cigar) {
|
||||
alignment.cigar = new uint16_t[sequence->n_cigar];
|
||||
memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
|
||||
}
|
||||
alignment.n_cigar = sequence->n_cigar;
|
||||
|
||||
delete[] sequence->md;
|
||||
sequence->md = NULL;
|
||||
|
||||
alignment_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
sequence->aln = NULL;
|
||||
sequence->n_aln = 0;
|
||||
|
||||
bwa_free_read_seq(1,sequence);
|
||||
}
|
||||
|
|
@ -132,63 +224,3 @@ void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const
|
|||
|
||||
sequence->full_len = sequence->len = read_length;
|
||||
}
|
||||
|
||||
void BWA::create_alignments(bwa_seq_t* sequence, Alignment*& alignments, unsigned& num_alignments) {
|
||||
num_alignments = 0;
|
||||
for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++)
|
||||
num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1;
|
||||
|
||||
alignments = new Alignment[num_alignments];
|
||||
unsigned alignment_idx = 0;
|
||||
|
||||
// backup existing alignment blocks.
|
||||
bwt_aln1_t* alignment_blocks = sequence->aln;
|
||||
int num_alignment_blocks = sequence->n_aln;
|
||||
|
||||
for(unsigned alignment_block_idx = 0; alignment_block_idx < (unsigned)num_alignment_blocks; alignment_block_idx++) {
|
||||
// Stub in a 'working' alignment block, so that only the desired alignment is local-aligned.
|
||||
const bwt_aln1_t* alignment_block = alignment_blocks + alignment_block_idx;
|
||||
bwt_aln1_t working_alignment_block = *alignment_block;
|
||||
|
||||
// Loop through all alignments, aligning each one individually.
|
||||
for(unsigned sa_idx = alignment_block->k; sa_idx <= alignment_block->l; sa_idx++) {
|
||||
working_alignment_block.k = working_alignment_block.l = sa_idx;
|
||||
sequence->aln = &working_alignment_block;
|
||||
sequence->n_aln = 1;
|
||||
|
||||
sequence->sa = sa_idx;
|
||||
sequence->strand = alignment_block->a;
|
||||
sequence->score = alignment_block->score;
|
||||
|
||||
// Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse.
|
||||
// TODO: Fix the interface to bwa_refine_gapped so its easier to work with.
|
||||
if(alignment_idx > 0)
|
||||
seq_reverse(sequence->len, sequence->seq, 0);
|
||||
|
||||
// Calculate the local coordinate and local alignment.
|
||||
bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr);
|
||||
bwa_refine_gapped(bns, 1, sequence, reference, NULL);
|
||||
|
||||
// Copy the local alignment data into the alignment object.
|
||||
Alignment& alignment = *(alignments + alignment_idx);
|
||||
alignment.type = sequence->type;
|
||||
bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig);
|
||||
alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1;
|
||||
alignment.negative_strand = sequence->strand;
|
||||
alignment.mapQ = sequence->mapQ;
|
||||
|
||||
alignment.cigar = NULL;
|
||||
if(sequence->cigar) {
|
||||
alignment.cigar = new uint16_t[sequence->n_cigar];
|
||||
memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
|
||||
}
|
||||
alignment.n_cigar = sequence->n_cigar;
|
||||
|
||||
alignment_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
// Restore original alignment blocks.
|
||||
sequence->aln = alignment_blocks;
|
||||
sequence->n_aln = num_alignment_blocks;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ class BWA {
|
|||
void load_default_options();
|
||||
bwa_seq_t* create_sequence();
|
||||
void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length);
|
||||
void create_alignments(bwa_seq_t* sequence, Alignment*& alignments, unsigned& num_alignments);
|
||||
|
||||
public:
|
||||
BWA(const char* ann_filename,
|
||||
|
|
@ -41,7 +40,20 @@ class BWA {
|
|||
void set_gap_extension_penalty(int penalty);
|
||||
|
||||
// Perform the alignment
|
||||
void align(const char* bases, const unsigned read_length, Alignment*& alignments, unsigned& num_alignments);
|
||||
void find_paths(const char* bases,
|
||||
const unsigned read_length,
|
||||
bwt_aln1_t*& paths,
|
||||
unsigned& num_paths,
|
||||
unsigned& best_path_count,
|
||||
unsigned& second_best_path_count);
|
||||
void generate_alignments_from_paths(const char* bases,
|
||||
const unsigned read_length,
|
||||
bwt_aln1_t* paths,
|
||||
const unsigned num_paths,
|
||||
const unsigned best_count,
|
||||
const unsigned second_best_count,
|
||||
Alignment*& alignments,
|
||||
unsigned& num_alignments);
|
||||
};
|
||||
|
||||
class Alignment {
|
||||
|
|
@ -50,11 +62,16 @@ class Alignment {
|
|||
int contig;
|
||||
bwtint_t pos;
|
||||
bool negative_strand;
|
||||
uint32_t mapping_quality;
|
||||
|
||||
uint16_t *cigar;
|
||||
int n_cigar;
|
||||
|
||||
uint32_t mapQ;
|
||||
uint8_t num_mismatches;
|
||||
uint8_t num_gap_opens;
|
||||
uint8_t num_gap_extensions;
|
||||
uint32_t num_best;
|
||||
uint32_t num_second_best;
|
||||
};
|
||||
|
||||
#endif // BWA_GATEWAY
|
||||
|
|
|
|||
|
|
@ -95,7 +95,63 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
|||
delete bwa;
|
||||
}
|
||||
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getAlignments(JNIEnv* env, jobject object, jlong java_bwa, jbyteArray java_bases)
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases)
|
||||
{
|
||||
BWA* bwa = (BWA*)java_bwa;
|
||||
|
||||
const jsize read_length = env->GetArrayLength(java_bases);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
||||
if(read_bases == NULL) return NULL;
|
||||
|
||||
bwt_aln1_t* paths = NULL;
|
||||
unsigned num_paths = 0;
|
||||
|
||||
unsigned best_path_count, second_best_path_count;
|
||||
bwa->find_paths((const char*)read_bases,read_length,paths,num_paths,best_path_count,second_best_path_count);
|
||||
|
||||
jobjectArray java_paths = env->NewObjectArray(num_paths, env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath"), NULL);
|
||||
if(java_paths == NULL) return NULL;
|
||||
|
||||
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
|
||||
bwt_aln1_t& path = *(paths + path_idx);
|
||||
|
||||
jclass java_path_class = env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath");
|
||||
if(java_path_class == NULL) return NULL;
|
||||
|
||||
jmethodID java_path_constructor = env->GetMethodID(java_path_class, "<init>", "(IIIZJJIII)V");
|
||||
if(java_path_constructor == NULL) return NULL;
|
||||
|
||||
// Note that k/l are being cast to long. Bad things will happen if JNI assumes that they're ints.
|
||||
jobject java_path = env->NewObject(java_path_class,
|
||||
java_path_constructor,
|
||||
path.n_mm,
|
||||
path.n_gapo,
|
||||
path.n_gape,
|
||||
path.a,
|
||||
(jlong)path.k,
|
||||
(jlong)path.l,
|
||||
path.score,
|
||||
best_path_count,
|
||||
second_best_path_count);
|
||||
if(java_path == NULL) return NULL;
|
||||
|
||||
env->SetObjectArrayElement(java_paths,path_idx,java_path);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
env->DeleteLocalRef(java_path_class);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
|
||||
delete[] paths;
|
||||
|
||||
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
|
||||
|
||||
return env->ExceptionCheck() ? NULL : java_paths;
|
||||
}
|
||||
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases, jobjectArray java_paths)
|
||||
{
|
||||
BWA* bwa = (BWA*)java_bwa;
|
||||
|
||||
|
|
@ -105,10 +161,67 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA
|
|||
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
||||
if(read_bases == NULL) return NULL;
|
||||
|
||||
const jsize num_paths = env->GetArrayLength(java_paths);
|
||||
bwt_aln1_t* paths = new bwt_aln1_t[num_paths];
|
||||
unsigned best_count = 0, second_best_count = 0;
|
||||
|
||||
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
|
||||
jobject java_path = env->GetObjectArrayElement(java_paths,path_idx);
|
||||
jclass java_path_class = env->GetObjectClass(java_path);
|
||||
if(java_path_class == NULL) return NULL;
|
||||
|
||||
bwt_aln1_t& path = *(paths + path_idx);
|
||||
|
||||
jfieldID mismatches_field = env->GetFieldID(java_path_class, "numMismatches", "I");
|
||||
if(mismatches_field == NULL) return NULL;
|
||||
path.n_mm = env->GetIntField(java_path,mismatches_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID gap_opens_field = env->GetFieldID(java_path_class, "numGapOpens", "I");
|
||||
if(gap_opens_field == NULL) return NULL;
|
||||
path.n_gapo = env->GetIntField(java_path,gap_opens_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID gap_extensions_field = env->GetFieldID(java_path_class, "numGapExtensions", "I");
|
||||
if(gap_extensions_field == NULL) return NULL;
|
||||
path.n_gape = env->GetIntField(java_path,gap_extensions_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID negative_strand_field = env->GetFieldID(java_path_class, "negativeStrand", "Z");
|
||||
if(negative_strand_field == NULL) return NULL;
|
||||
path.a = env->GetBooleanField(java_path,negative_strand_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID k_field = env->GetFieldID(java_path_class, "k", "J");
|
||||
if(k_field == NULL) return NULL;
|
||||
path.k = env->GetLongField(java_path,k_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID l_field = env->GetFieldID(java_path_class, "l", "J");
|
||||
if(l_field == NULL) return NULL;
|
||||
path.l = env->GetLongField(java_path,l_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID score_field = env->GetFieldID(java_path_class, "score", "I");
|
||||
if(score_field == NULL) return NULL;
|
||||
path.score = env->GetIntField(java_path,score_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID best_count_field = env->GetFieldID(java_path_class, "bestCount", "I");
|
||||
if(best_count_field == NULL) return NULL;
|
||||
best_count = env->GetIntField(java_path,best_count_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID second_best_count_field = env->GetFieldID(java_path_class, "secondBestCount", "I");
|
||||
if(second_best_count_field == NULL) return NULL;
|
||||
second_best_count = env->GetIntField(java_path,second_best_count_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
|
||||
Alignment* alignments = NULL;
|
||||
unsigned num_alignments = 0;
|
||||
|
||||
bwa->align((const char*)read_bases,read_length,alignments,num_alignments);
|
||||
bwa->generate_alignments_from_paths((const char*)read_bases,read_length,paths,num_paths,best_count,second_best_count,alignments,num_alignments);
|
||||
num_alignments = 0;
|
||||
|
||||
jobjectArray java_alignments = env->NewObjectArray(num_alignments, env->FindClass("org/broadinstitute/sting/alignment/Alignment"), NULL);
|
||||
if(java_alignments == NULL) return NULL;
|
||||
|
|
@ -128,39 +241,44 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA
|
|||
|
||||
if(alignment.cigar) {
|
||||
for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) {
|
||||
jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14];
|
||||
jint cigar_length = alignment.cigar[cigar_idx]&0x3fff;
|
||||
|
||||
env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14];
|
||||
jint cigar_length = alignment.cigar[cigar_idx]&0x3fff;
|
||||
|
||||
env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(alignment.type != BWA_TYPE_NO_MATCH) {
|
||||
jchar cigar_operator = 'M';
|
||||
env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
jchar cigar_operator = 'M';
|
||||
env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment");
|
||||
if(java_alignment_class == NULL) return NULL;
|
||||
|
||||
jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[I)V");
|
||||
jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[IIIIII)V");
|
||||
if(java_alignment_constructor == NULL) return NULL;
|
||||
|
||||
jobject java_alignment = env->NewObject(java_alignment_class,
|
||||
java_alignment_constructor,
|
||||
alignment.contig,
|
||||
alignment.pos,
|
||||
alignment.negative_strand,
|
||||
alignment.mapQ,
|
||||
java_cigar_operators,
|
||||
java_cigar_lengths);
|
||||
java_alignment_constructor,
|
||||
alignment.contig,
|
||||
alignment.pos,
|
||||
alignment.negative_strand,
|
||||
alignment.mapping_quality,
|
||||
java_cigar_operators,
|
||||
java_cigar_lengths,
|
||||
alignment.num_mismatches,
|
||||
alignment.num_gap_opens,
|
||||
alignment.num_gap_extensions,
|
||||
alignment.num_best,
|
||||
alignment.num_second_best);
|
||||
if(java_alignment == NULL) return NULL;
|
||||
|
||||
delete[] alignment.cigar;
|
||||
|
|
@ -173,6 +291,7 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA
|
|||
}
|
||||
|
||||
delete[] alignments;
|
||||
delete[] paths;
|
||||
|
||||
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
|
||||
|
||||
|
|
|
|||
|
|
@ -25,12 +25,20 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
|||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||
* Method: getAlignments
|
||||
* Signature: (J[B)[Lorg/broadinstitute/sting/alignment/Alignment;
|
||||
* Method: getPaths
|
||||
* Signature: (J[B)[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;
|
||||
*/
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getAlignments
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths
|
||||
(JNIEnv *, jobject, jlong, jbyteArray);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||
* Method: convertPathsToAlignments
|
||||
* Signature: (J[B[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;)[Lorg/broadinstitute/sting/alignment/Alignment;
|
||||
*/
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments
|
||||
(JNIEnv *, jobject, jlong, jbyteArray, jobjectArray);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -16,8 +16,14 @@ public class Alignment {
|
|||
protected boolean negativeStrand;
|
||||
protected int mappingQuality;
|
||||
|
||||
private char[] cigarOperators;
|
||||
private int[] cigarLengths;
|
||||
protected char[] cigarOperators;
|
||||
protected int[] cigarLengths;
|
||||
|
||||
protected int numMismatches;
|
||||
protected int numGapOpens;
|
||||
protected int numGapExtensions;
|
||||
protected int bestCount;
|
||||
protected int secondBestCount;
|
||||
|
||||
/**
|
||||
* Gets the index of the given contig.
|
||||
|
|
@ -43,6 +49,36 @@ public class Alignment {
|
|||
*/
|
||||
public int getMappingQuality() { return mappingQuality; }
|
||||
|
||||
/**
|
||||
* Gets the number of mismatches in the read.
|
||||
* @return Number of mismatches.
|
||||
*/
|
||||
public int getNumMismatches() { return numMismatches; }
|
||||
|
||||
/**
|
||||
* Get the number of gap opens.
|
||||
* @return Number of gap opens.
|
||||
*/
|
||||
public int getNumGapOpens() { return numGapOpens; }
|
||||
|
||||
/**
|
||||
* Get the number of gap extensions.
|
||||
* @return Number of gap extensions.
|
||||
*/
|
||||
public int getNumGapExtensions() { return numGapExtensions; }
|
||||
|
||||
/**
|
||||
* Get the number of best alignments.
|
||||
* @return Number of top scoring alignments.
|
||||
*/
|
||||
public int getBestCount() { return bestCount; }
|
||||
|
||||
/**
|
||||
* Get the number of second best alignments.
|
||||
* @return Number of second best scoring alignments.
|
||||
*/
|
||||
public int getSecondBestCount() { return secondBestCount; }
|
||||
|
||||
/**
|
||||
* Gets the cigar for this alignment.
|
||||
* @return sam-jdk formatted alignment.
|
||||
|
|
@ -87,13 +123,27 @@ public class Alignment {
|
|||
* @param cigarOperators The ordered operators in the cigar string.
|
||||
* @param cigarLengths The lengths to which each operator applies.
|
||||
*/
|
||||
public Alignment(int contigIndex, int alignmentStart, boolean negativeStrand, int mappingQuality,
|
||||
char[] cigarOperators, int[] cigarLengths) {
|
||||
public Alignment(int contigIndex,
|
||||
int alignmentStart,
|
||||
boolean negativeStrand,
|
||||
int mappingQuality,
|
||||
char[] cigarOperators,
|
||||
int[] cigarLengths,
|
||||
int numMismatches,
|
||||
int numGapOpens,
|
||||
int numGapExtensions,
|
||||
int bestCount,
|
||||
int secondBestCount) {
|
||||
this.contigIndex = contigIndex;
|
||||
this.alignmentStart = alignmentStart;
|
||||
this.negativeStrand = negativeStrand;
|
||||
this.mappingQuality = mappingQuality;
|
||||
this.cigarOperators = cigarOperators;
|
||||
this.cigarLengths = cigarLengths;
|
||||
this.numMismatches = numMismatches;
|
||||
this.numGapOpens = numGapOpens;
|
||||
this.numGapExtensions = numGapExtensions;
|
||||
this.bestCount = bestCount;
|
||||
this.secondBestCount = secondBestCount;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,9 +6,14 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWAPath;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
/**
|
||||
* Validates alignments against existing reads.
|
||||
|
|
@ -23,6 +28,8 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
|
||||
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
|
||||
|
||||
//SortedMap<Integer,Integer> alignmentCounts = new TreeMap<Integer,Integer>();
|
||||
|
||||
/**
|
||||
* The instance used to generate alignments.
|
||||
*/
|
||||
|
|
@ -48,7 +55,7 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
@Override
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
return true;
|
||||
//return read.getReadName().contains("SL-XBC:1:61:1719:1512#0");
|
||||
//return read.getReadName().contains("SL-XBC:1:13:1296:1410#0");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -66,6 +73,15 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
boolean matches = true;
|
||||
Iterator<Alignment[]> alignments = aligner.getAlignments(bases);
|
||||
/*
|
||||
BWAPath[] paths = aligner.getPaths(bases);
|
||||
int bestCount = 0;
|
||||
if(paths.length > 0) bestCount = paths[0].bestCount;
|
||||
if(alignmentCounts.containsKey(bestCount))
|
||||
alignmentCounts.put(bestCount,alignmentCounts.get(bestCount)+1);
|
||||
else
|
||||
alignmentCounts.put(bestCount,1);
|
||||
*/
|
||||
|
||||
if(!alignments.hasNext()) {
|
||||
matches = read.getReadUnmappedFlag();
|
||||
|
|
@ -102,7 +118,9 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
throw new StingException(String.format("Read %s mismatches!", read.getReadName()));
|
||||
}
|
||||
|
||||
if(++count % 10000 == 0) logger.info(String.format("Processed %d reads", count));
|
||||
if(++count % 10000 == 0) {
|
||||
logger.info(String.format("Processed %d reads", count));
|
||||
}
|
||||
|
||||
//logger.info(String.format("validated read %s", read.getReadName()));
|
||||
|
||||
|
|
@ -135,6 +153,18 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
public void onTraversalDone(Integer result) {
|
||||
aligner.close();
|
||||
super.onTraversalDone(result);
|
||||
|
||||
/*
|
||||
try {
|
||||
PrintWriter output = new PrintWriter("GATK_distribution.out");
|
||||
for(SortedMap.Entry<Integer,Integer> results: alignmentCounts.entrySet())
|
||||
output.printf("%d\t%d%n", results.getKey(), results.getValue());
|
||||
output.close();
|
||||
}
|
||||
catch(FileNotFoundException ex) {
|
||||
throw new StingException("Unable to open output file");
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,11 +4,8 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* An aligner using the BWA/C implementation.
|
||||
|
|
@ -34,7 +31,7 @@ public class BWACAligner {
|
|||
protected native long create(BWACConfiguration configuration);
|
||||
|
||||
/**
|
||||
* Destroy the
|
||||
* Destroy the BWA/C thunk.
|
||||
* @param thunkPointer Pointer to the allocated thunk.
|
||||
*/
|
||||
protected native void destroy(long thunkPointer);
|
||||
|
|
@ -59,8 +56,8 @@ public class BWACAligner {
|
|||
* @param bases List of bases.
|
||||
* @return Iterator to alignments.
|
||||
*/
|
||||
public Iterator<Alignment[]> getAlignments(byte[] bases) {
|
||||
final Alignment[] alignments = getAllAlignments(bases);
|
||||
public Iterator<Alignment[]> getAlignments(final byte[] bases) {
|
||||
final BWAPath[] paths = getPaths(bases);
|
||||
return new Iterator<Alignment[]>() {
|
||||
/**
|
||||
* The last position accessed.
|
||||
|
|
@ -71,19 +68,19 @@ public class BWACAligner {
|
|||
* Whether all alignments have been seen based on the current position.
|
||||
* @return True if any more alignments are pending. False otherwise.
|
||||
*/
|
||||
public boolean hasNext() { return position < alignments.length; }
|
||||
public boolean hasNext() { return position < paths.length; }
|
||||
|
||||
/**
|
||||
* Return the next cross-section of alignments, based on mapping quality.
|
||||
* @return Array of the next set of alignments of a given mapping quality.
|
||||
*/
|
||||
public Alignment[] next() {
|
||||
if(position >= alignments.length)
|
||||
if(position >= paths.length)
|
||||
throw new UnsupportedOperationException("Out of alignments to return.");
|
||||
int mapQ = alignments[position].getMappingQuality();
|
||||
int score = paths[position].score;
|
||||
int startingPosition = position;
|
||||
while(position < alignments.length && alignments[position].getMappingQuality() == mapQ) position++;
|
||||
return Arrays.copyOfRange(alignments,startingPosition,position);
|
||||
while(position < paths.length && paths[position].score == score) position++;
|
||||
return convertPathsToAlignments(bases,Arrays.copyOfRange(paths,startingPosition,position));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -93,19 +90,6 @@ public class BWACAligner {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Align the given base array to the BWT. The base array should be in ASCII format.
|
||||
* @param bases ASCII representation of byte array.
|
||||
* @return an array of indices into the bwa.
|
||||
*/
|
||||
public Alignment[] getAllAlignments(byte[] bases) {
|
||||
if(thunkPointer == 0)
|
||||
throw new StingException("BWA/C align attempted, but BWA/C was never properly initialized.");
|
||||
Alignment[] alignments = getAlignments(thunkPointer,bases);
|
||||
Arrays.sort(alignments,new AlignmentMapQComparator());
|
||||
return alignments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a iterator of aligned reads, batched by mapping quality.
|
||||
* @param read Read to align.
|
||||
|
|
@ -182,28 +166,56 @@ public class BWACAligner {
|
|||
}
|
||||
|
||||
/**
|
||||
* Caller to the . The base array should be in
|
||||
* ASCII format.
|
||||
* @param thunkPointer pointer to the C++ object managing BWA/C.
|
||||
* Align the given base array to the BWT. The base array should be in ASCII format.
|
||||
* @param bases ASCII representation of byte array.
|
||||
* @return an array of indices into the bwa.
|
||||
*/
|
||||
protected native Alignment[] getAlignments(long thunkPointer, byte[] bases);
|
||||
public Alignment[] getAllAlignments(byte[] bases) {
|
||||
if(thunkPointer == 0)
|
||||
throw new StingException("BWA/C align attempted, but BWA/C is not properly initialized.");
|
||||
BWAPath[] paths = getPaths(bases);
|
||||
return convertPathsToAlignments(thunkPointer,bases,paths);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two alignments based on their mapping quality. The one with the highest mapping quality comes FIRST.
|
||||
* Get the paths associated with the given base string.
|
||||
* @param bases List of bases.
|
||||
* @return A set of paths through the BWA.
|
||||
*/
|
||||
private class AlignmentMapQComparator implements Comparator<Alignment> {
|
||||
/**
|
||||
* Compares two alignments based on their score. If lhs has a higher mapping quality than
|
||||
* rhs, the score will be negative.
|
||||
* @param lhs Left-hand side for comparison.
|
||||
* @param rhs Right-hand side for comparison.
|
||||
* @return Negative value if lhs' score is highe
|
||||
*/
|
||||
public int compare(Alignment lhs, Alignment rhs) {
|
||||
// NOTE: this strategy works because Integer.MAX_VALUE, Integer.MIN_VALUE >> than valid range of mapping
|
||||
// qualities.
|
||||
return rhs.getMappingQuality() - lhs.getMappingQuality();
|
||||
}
|
||||
public BWAPath[] getPaths(byte[] bases) {
|
||||
if(thunkPointer == 0)
|
||||
throw new StingException("BWA/C getPaths attempted, but BWA/C is not properly initialized.");
|
||||
BWAPath[] paths = getPaths(thunkPointer,bases);
|
||||
return paths;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the extra steps involved in converting a local alignment to a global alignment.
|
||||
* @param bases ASCII representation of byte array.
|
||||
* @param paths Paths through the current BWT.
|
||||
* @return A list of alignments.
|
||||
*/
|
||||
protected Alignment[] convertPathsToAlignments(byte[] bases, BWAPath[] paths) {
|
||||
if(thunkPointer == 0)
|
||||
throw new StingException("BWA/C convertPathsToAlignments attempted, but BWA/C is not properly initialized.");
|
||||
return convertPathsToAlignments(thunkPointer,bases,paths);
|
||||
}
|
||||
|
||||
/**
|
||||
* Caller to the path generation functionality within BWA/C. Call this method's getPaths() wrapper (above) instead.
|
||||
* @param thunkPointer pointer to the C++ object managing BWA/C.
|
||||
* @param bases ASCII representation of byte array.
|
||||
* @return A list of paths through the specified BWT.
|
||||
*/
|
||||
protected native BWAPath[] getPaths(long thunkPointer, byte[] bases);
|
||||
|
||||
/**
|
||||
* Do the extra steps involved in converting a local alignment to a global alignment.
|
||||
* Call this method's convertPathsToAlignments() wrapper (above) instead.
|
||||
* @param thunkPointer pointer to the C++ object managing BWA/C.
|
||||
* @param bases ASCII representation of byte array.
|
||||
* @param paths Paths through the current BWT.
|
||||
* @return A list of alignments.
|
||||
*/
|
||||
protected native Alignment[] convertPathsToAlignments(long thunkPointer, byte[] bases, BWAPath[] paths);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.alignment.bwa.c;
|
||||
|
||||
/**
|
||||
* Models a BWA path.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWAPath {
|
||||
/**
|
||||
* Number of mismatches encountered along this path.
|
||||
*/
|
||||
public final int numMismatches;
|
||||
|
||||
/**
|
||||
* Number of gap opens encountered along this path.
|
||||
*/
|
||||
public final int numGapOpens;
|
||||
|
||||
/**
|
||||
* Number of gap extensions along this path.
|
||||
*/
|
||||
public final int numGapExtensions;
|
||||
|
||||
/**
|
||||
* Whether this alignment was found on the positive or negative strand.
|
||||
*/
|
||||
public final boolean negativeStrand;
|
||||
|
||||
/**
|
||||
* Starting coordinate in the BWT.
|
||||
*/
|
||||
public final long k;
|
||||
|
||||
/**
|
||||
* Ending coordinate in the BWT.
|
||||
*/
|
||||
public final long l;
|
||||
|
||||
/**
|
||||
* The score of this path.
|
||||
*/
|
||||
public final int score;
|
||||
|
||||
/**
|
||||
* The number of best alignments seen along this path.
|
||||
*/
|
||||
public final int bestCount;
|
||||
|
||||
/**
|
||||
* The number of second best alignments seen along this path.
|
||||
*/
|
||||
public final int secondBestCount;
|
||||
|
||||
/**
|
||||
* Create a new path with the given attributes.
|
||||
* @param numMismatches Number of mismatches along path.
|
||||
* @param numGapOpens Number of gap opens along path.
|
||||
* @param numGapExtensions Number of gap extensions along path.
|
||||
* @param k Index to first coordinate within BWT.
|
||||
* @param l Index to last coordinate within BWT.
|
||||
* @param score Score of this alignment. Not the mapping quality.
|
||||
*/
|
||||
public BWAPath(int numMismatches, int numGapOpens, int numGapExtensions, boolean negativeStrand, long k, long l, int score, int bestCount, int secondBestCount) {
|
||||
this.numMismatches = numMismatches;
|
||||
this.numGapOpens = numGapOpens;
|
||||
this.numGapExtensions = numGapExtensions;
|
||||
this.negativeStrand = negativeStrand;
|
||||
this.k = k;
|
||||
this.l = l;
|
||||
this.score = score;
|
||||
this.bestCount = bestCount;
|
||||
this.secondBestCount = secondBestCount;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue