BWA bindings and tests moved to public (was required for ValidationAmplicons)
Integration tests for ValidationAmplicons. New argument to disable BWA, lowercase letters only for repetitiveness instead.
This commit is contained in:
parent
07e716d23a
commit
92c7cfa1c8
|
|
@ -0,0 +1,70 @@
|
|||
#include "MainTools.h"
|
||||
#include "Basevector.h"
|
||||
#include "lookup/LookAlign.h"
|
||||
#include "lookup/SerialQltout.h"
|
||||
|
||||
unsigned int MatchingEnd(look_align &la, vecbasevector &candidates, vecbasevector &ref) {
|
||||
//la.PrintParseable(cout);
|
||||
|
||||
for (int i = 0; i < candidates.size(); i++) {
|
||||
look_align newla = la;
|
||||
|
||||
if (newla.rc1) { candidates[i].ReverseComplement(); }
|
||||
newla.ResetFromAlign(newla.a, candidates[i], ref[la.target_id]);
|
||||
|
||||
//newla.PrintParseable(cout, &candidates[i], &ref[newla.target_id]);
|
||||
//cout << newla.Errors() << " " << la.Errors() << endl;
|
||||
|
||||
if (newla.Errors() == la.Errors()) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
//FatalErr("Query id " + ToString(la.query_id) + " had no matches.");
|
||||
|
||||
return candidates.size() + 1;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
RunTime();
|
||||
|
||||
BeginCommandArguments;
|
||||
CommandArgument_String(ALIGNS);
|
||||
CommandArgument_String(FASTB_END_1);
|
||||
CommandArgument_String(FASTB_END_2);
|
||||
CommandArgument_String(REFERENCE);
|
||||
|
||||
CommandArgument_String(ALIGNS_END_1_OUT);
|
||||
CommandArgument_String(ALIGNS_END_2_OUT);
|
||||
EndCommandArguments;
|
||||
|
||||
vecbasevector ref(REFERENCE);
|
||||
vecbasevector reads1(FASTB_END_1);
|
||||
vecbasevector reads2(FASTB_END_2);
|
||||
|
||||
ofstream aligns1stream(ALIGNS_END_1_OUT.c_str());
|
||||
ofstream aligns2stream(ALIGNS_END_2_OUT.c_str());
|
||||
|
||||
basevector bv;
|
||||
|
||||
SerialQltout sqltout(ALIGNS);
|
||||
look_align la;
|
||||
while (sqltout.Next(la)) {
|
||||
vecbasevector candidates(2);
|
||||
candidates[0] = reads1[la.query_id];
|
||||
candidates[1] = reads2[la.query_id];
|
||||
|
||||
unsigned int matchingend = MatchingEnd(la, candidates, ref);
|
||||
if (matchingend < 2) {
|
||||
bv = (matchingend == 0) ? reads1[la.query_id] : reads2[la.query_id];
|
||||
|
||||
//la.PrintParseable(cout, &bv, &ref[la.target_id]);
|
||||
la.PrintParseable(((matchingend == 0) ? aligns1stream : aligns2stream), &bv, &ref[la.target_id]);
|
||||
}
|
||||
}
|
||||
|
||||
aligns1stream.close();
|
||||
aligns2stream.close();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
CXX=g++
|
||||
CXXFLAGS=-g -Wall -O2 -m64 -fPIC
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -c $(CXXFLAGS) -I$(BWA_HOME) -I$(JAVA_INCLUDE) $< -o $@
|
||||
|
||||
all: init lib
|
||||
|
||||
init:
|
||||
@echo Please make sure the following platforms are set correctly on your machine.
|
||||
@echo BWA_HOME=$(BWA_HOME)
|
||||
@echo JAVA_INCLUDE=$(JAVA_INCLUDE)
|
||||
@echo TARGET_LIB=$(TARGET_LIB)
|
||||
@echo EXTRA_LIBS=$(EXTRA_LIBS)
|
||||
@echo LIBTOOL_COMMAND=$(LIBTOOL_COMMAND)
|
||||
|
||||
lib: org_broadinstitute_sting_alignment_bwa_c_BWACAligner.o bwa_gateway.o
|
||||
$(LIBTOOL_COMMAND) $? -o $(TARGET_LIB) -L$(BWA_HOME) -lbwacore $(EXTRA_LIBS)
|
||||
|
||||
clean:
|
||||
rm *.o libbwa.*
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/sh
|
||||
export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa"
|
||||
export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux"
|
||||
export TARGET_LIB="libbwa.so"
|
||||
export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread"
|
||||
export LIBTOOL_COMMAND="g++ -shared -Wl,-soname,libbwa.so"
|
||||
make
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/sh
|
||||
export BWA_HOME="/Users/mhanna/src/bwa"
|
||||
export JAVA_INCLUDE="/System/Library/Frameworks/JavaVM.framework/Headers"
|
||||
export TARGET_LIB="libbwa.dylib"
|
||||
export EXTRA_LIBS="-lc -lz -lsupc++"
|
||||
export LIBTOOL_COMMAND="libtool -dynamic"
|
||||
make
|
||||
|
|
@ -0,0 +1,268 @@
|
|||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#include "bwase.h"
|
||||
#include "bwa_gateway.h"
|
||||
|
||||
BWA::BWA(const char* ann_filename,
|
||||
const char* amb_filename,
|
||||
const char* pac_filename,
|
||||
const char* forward_bwt_filename,
|
||||
const char* forward_sa_filename,
|
||||
const char* reverse_bwt_filename,
|
||||
const char* reverse_sa_filename)
|
||||
{
|
||||
// Load the bns (?) and reference
|
||||
bns = bns_restore_core(ann_filename,amb_filename,pac_filename);
|
||||
reference = new ubyte_t[bns->l_pac/4+1];
|
||||
rewind(bns->fp_pac);
|
||||
fread(reference, 1, bns->l_pac/4+1, bns->fp_pac);
|
||||
fclose(bns->fp_pac);
|
||||
bns->fp_pac = NULL;
|
||||
|
||||
// Load the BWTs (both directions) and suffix arrays (both directions)
|
||||
bwts[0] = bwt_restore_bwt(forward_bwt_filename);
|
||||
bwt_restore_sa(forward_sa_filename, bwts[0]);
|
||||
bwts[1] = bwt_restore_bwt(reverse_bwt_filename);
|
||||
bwt_restore_sa(reverse_sa_filename, bwts[1]);
|
||||
load_default_options();
|
||||
|
||||
// initialize the bwase subsystem
|
||||
bwase_initialize();
|
||||
}
|
||||
|
||||
BWA::~BWA() {
|
||||
delete[] reference;
|
||||
bns_destroy(bns);
|
||||
bwt_destroy(bwts[0]);
|
||||
bwt_destroy(bwts[1]);
|
||||
}
|
||||
|
||||
void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count)
|
||||
{
|
||||
bwa_seq_t* sequence = create_sequence(bases, read_length);
|
||||
|
||||
// Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln).
|
||||
// This method will destroy the contents of seq and rseq.
|
||||
bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);
|
||||
|
||||
paths = new bwt_aln1_t[sequence->n_aln];
|
||||
memcpy(paths,sequence->aln,sequence->n_aln*sizeof(bwt_aln1_t));
|
||||
num_paths = sequence->n_aln;
|
||||
|
||||
// Call aln2seq to initialize the type of match present.
|
||||
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
|
||||
best_path_count = sequence->c1;
|
||||
second_best_path_count = sequence->c2;
|
||||
|
||||
bwa_free_read_seq(1,sequence);
|
||||
}
|
||||
|
||||
Alignment* BWA::generate_single_alignment(const char* bases, const unsigned read_length) {
|
||||
bwa_seq_t* sequence = create_sequence(bases,read_length);
|
||||
|
||||
// Calculate paths.
|
||||
bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);
|
||||
|
||||
// Check for no alignments found and return null.
|
||||
if(sequence->n_aln == 0) {
|
||||
bwa_free_read_seq(1,sequence);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in.
|
||||
copy_bases_into_sequence(sequence,bases,read_length);
|
||||
|
||||
// Pick best alignment and propagate its information into the sequence.
|
||||
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
|
||||
|
||||
// Generate the best alignment from the sequence.
|
||||
Alignment* alignment = new Alignment;
|
||||
*alignment = generate_final_alignment_from_sequence(sequence);
|
||||
|
||||
bwa_free_read_seq(1,sequence);
|
||||
|
||||
return alignment;
|
||||
}
|
||||
|
||||
void BWA::generate_alignments_from_paths(const char* bases,
|
||||
const unsigned read_length,
|
||||
bwt_aln1_t* paths,
|
||||
const unsigned num_paths,
|
||||
const unsigned best_count,
|
||||
const unsigned second_best_count,
|
||||
Alignment*& alignments,
|
||||
unsigned& num_alignments)
|
||||
{
|
||||
bwa_seq_t* sequence = create_sequence(bases,read_length);
|
||||
|
||||
sequence->aln = paths;
|
||||
sequence->n_aln = num_paths;
|
||||
|
||||
// (Ab)use bwa_aln2seq to propagate values stored in the path out into the sequence itself.
|
||||
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
|
||||
|
||||
// But overwrite key parts of the sequence in case the user passed back only a smaller subset
|
||||
// of the paths.
|
||||
sequence->c1 = best_count;
|
||||
sequence->c2 = second_best_count;
|
||||
sequence->type = sequence->c1 > 1 ? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE;
|
||||
|
||||
num_alignments = 0;
|
||||
for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++)
|
||||
num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1;
|
||||
|
||||
alignments = new Alignment[num_alignments];
|
||||
unsigned alignment_idx = 0;
|
||||
|
||||
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
|
||||
// Stub in a 'working' path, so that only the desired alignment is local-aligned.
|
||||
const bwt_aln1_t* path = paths + path_idx;
|
||||
bwt_aln1_t working_path = *path;
|
||||
|
||||
// Loop through all alignments, aligning each one individually.
|
||||
for(unsigned sa_idx = path->k; sa_idx <= path->l; sa_idx++) {
|
||||
working_path.k = working_path.l = sa_idx;
|
||||
sequence->aln = &working_path;
|
||||
sequence->n_aln = 1;
|
||||
|
||||
sequence->sa = sa_idx;
|
||||
sequence->strand = path->a;
|
||||
sequence->score = path->score;
|
||||
|
||||
// Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse.
|
||||
// TODO: Fix the interface to bwa_refine_gapped so its easier to work with.
|
||||
if(alignment_idx > 0)
|
||||
seq_reverse(sequence->len, sequence->seq, 0);
|
||||
|
||||
// Copy the local alignment data into the alignment object.
|
||||
*(alignments + alignment_idx) = generate_final_alignment_from_sequence(sequence);
|
||||
|
||||
alignment_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
sequence->aln = NULL;
|
||||
sequence->n_aln = 0;
|
||||
|
||||
bwa_free_read_seq(1,sequence);
|
||||
}
|
||||
|
||||
Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) {
|
||||
// Calculate the local coordinate and local alignment.
|
||||
bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr);
|
||||
bwa_refine_gapped(bns, 1, sequence, reference, NULL);
|
||||
|
||||
// Copy the local alignment data into the alignment object.
|
||||
Alignment alignment;
|
||||
|
||||
// Populate basic path info
|
||||
alignment.edit_distance = sequence->nm;
|
||||
alignment.num_mismatches = sequence->n_mm;
|
||||
alignment.num_gap_opens = sequence->n_gapo;
|
||||
alignment.num_gap_extensions = sequence->n_gape;
|
||||
alignment.num_best = sequence->c1;
|
||||
alignment.num_second_best = sequence->c2;
|
||||
|
||||
// Final alignment position.
|
||||
alignment.type = sequence->type;
|
||||
bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig);
|
||||
alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1;
|
||||
alignment.negative_strand = sequence->strand;
|
||||
alignment.mapping_quality = sequence->mapQ;
|
||||
|
||||
// Cigar step.
|
||||
alignment.cigar = NULL;
|
||||
if(sequence->cigar) {
|
||||
alignment.cigar = new uint16_t[sequence->n_cigar];
|
||||
memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
|
||||
}
|
||||
alignment.n_cigar = sequence->n_cigar;
|
||||
|
||||
// MD tag with a better breakdown of differences in the cigar
|
||||
alignment.md = strdup(sequence->md);
|
||||
delete[] sequence->md;
|
||||
sequence->md = NULL;
|
||||
|
||||
return alignment;
|
||||
}
|
||||
|
||||
void BWA::load_default_options()
|
||||
{
|
||||
options.s_mm = 3;
|
||||
options.s_gapo = 11;
|
||||
options.s_gape = 4;
|
||||
options.mode = 3;
|
||||
options.indel_end_skip = 5;
|
||||
options.max_del_occ = 10;
|
||||
options.max_entries = 2000000;
|
||||
options.fnr = 0.04;
|
||||
options.max_diff = -1;
|
||||
options.max_gapo = 1;
|
||||
options.max_gape = 6;
|
||||
options.max_seed_diff = 2;
|
||||
options.seed_len = 2147483647;
|
||||
options.n_threads = 1;
|
||||
options.max_top2 = 30;
|
||||
options.trim_qual = 0;
|
||||
}
|
||||
|
||||
void BWA::set_max_edit_distance(float edit_distance) {
|
||||
if(edit_distance > 0 && edit_distance < 1) {
|
||||
options.fnr = edit_distance;
|
||||
options.max_diff = -1;
|
||||
}
|
||||
else {
|
||||
options.fnr = -1.0;
|
||||
options.max_diff = (int)edit_distance;
|
||||
}
|
||||
}
|
||||
|
||||
void BWA::set_max_gap_opens(int max_gap_opens) { options.max_gapo = max_gap_opens; }
|
||||
void BWA::set_max_gap_extensions(int max_gap_extensions) { options.max_gape = max_gap_extensions; }
|
||||
void BWA::set_disallow_indel_within_range(int indel_range) { options.indel_end_skip = indel_range; }
|
||||
void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; }
|
||||
void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; }
|
||||
void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; }
|
||||
|
||||
/**
|
||||
* Create a sequence with a set of reasonable initial defaults.
|
||||
* Will leave seq and rseq empty.
|
||||
*/
|
||||
bwa_seq_t* BWA::create_sequence(const char* bases, const unsigned read_length)
|
||||
{
|
||||
bwa_seq_t* sequence = new bwa_seq_t;
|
||||
|
||||
sequence->tid = -1;
|
||||
|
||||
sequence->name = 0;
|
||||
|
||||
copy_bases_into_sequence(sequence, bases, read_length);
|
||||
|
||||
sequence->qual = 0;
|
||||
sequence->aln = 0;
|
||||
sequence->md = 0;
|
||||
|
||||
sequence->cigar = NULL;
|
||||
sequence->n_cigar = 0;
|
||||
|
||||
sequence->multi = NULL;
|
||||
sequence->n_multi = 0;
|
||||
|
||||
return sequence;
|
||||
}
|
||||
|
||||
void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length)
|
||||
{
|
||||
// seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap
|
||||
sequence->seq = new ubyte_t[read_length];
|
||||
sequence->rseq = new ubyte_t[read_length];
|
||||
for(unsigned i = 0; i < read_length; i++) sequence->seq[i] = nst_nt4_table[(unsigned)bases[i]];
|
||||
memcpy(sequence->rseq,sequence->seq,read_length);
|
||||
|
||||
// BWA expects the read bases to arrive reversed.
|
||||
seq_reverse(read_length,sequence->seq,0);
|
||||
seq_reverse(read_length,sequence->rseq,1);
|
||||
|
||||
sequence->full_len = sequence->len = read_length;
|
||||
}
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
#ifndef BWA_GATEWAY
|
||||
#define BWA_GATEWAY
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include "bntseq.h"
|
||||
#include "bwt.h"
|
||||
#include "bwtaln.h"
|
||||
|
||||
class Alignment {
|
||||
public:
|
||||
uint32_t type;
|
||||
int contig;
|
||||
bwtint_t pos;
|
||||
bool negative_strand;
|
||||
uint32_t mapping_quality;
|
||||
|
||||
uint16_t *cigar;
|
||||
int n_cigar;
|
||||
|
||||
uint8_t num_mismatches;
|
||||
uint8_t num_gap_opens;
|
||||
uint8_t num_gap_extensions;
|
||||
uint16_t edit_distance;
|
||||
|
||||
uint32_t num_best;
|
||||
uint32_t num_second_best;
|
||||
|
||||
char* md;
|
||||
};
|
||||
|
||||
class BWA {
|
||||
private:
|
||||
bntseq_t *bns;
|
||||
ubyte_t* reference;
|
||||
bwt_t* bwts[2];
|
||||
gap_opt_t options;
|
||||
|
||||
void load_default_options();
|
||||
bwa_seq_t* create_sequence(const char* bases, const unsigned read_length);
|
||||
void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length);
|
||||
Alignment generate_final_alignment_from_sequence(bwa_seq_t* sequence);
|
||||
|
||||
public:
|
||||
BWA(const char* ann_filename,
|
||||
const char* amb_filename,
|
||||
const char* pac_filename,
|
||||
const char* forward_bwt_filename,
|
||||
const char* forward_sa_filename,
|
||||
const char* reverse_bwt_filename,
|
||||
const char* reverse_sa_filename);
|
||||
~BWA();
|
||||
|
||||
// Parameterize the aligner.
|
||||
void set_max_edit_distance(float edit_distance);
|
||||
void set_max_gap_opens(int max_gap_opens);
|
||||
void set_max_gap_extensions(int max_gap_extensions);
|
||||
void set_disallow_indel_within_range(int indel_range);
|
||||
void set_mismatch_penalty(int penalty);
|
||||
void set_gap_open_penalty(int penalty);
|
||||
void set_gap_extension_penalty(int penalty);
|
||||
|
||||
// Perform the alignment
|
||||
Alignment* generate_single_alignment(const char* bases,
|
||||
const unsigned read_length);
|
||||
void find_paths(const char* bases,
|
||||
const unsigned read_length,
|
||||
bwt_aln1_t*& paths,
|
||||
unsigned& num_paths,
|
||||
unsigned& best_path_count,
|
||||
unsigned& second_best_path_count);
|
||||
void generate_alignments_from_paths(const char* bases,
|
||||
const unsigned read_length,
|
||||
bwt_aln1_t* paths,
|
||||
const unsigned num_paths,
|
||||
const unsigned best_count,
|
||||
const unsigned second_best_count,
|
||||
Alignment*& alignments,
|
||||
unsigned& num_alignments);
|
||||
};
|
||||
|
||||
#endif // BWA_GATEWAY
|
||||
Binary file not shown.
|
|
@ -0,0 +1,437 @@
|
|||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "bntseq.h"
|
||||
#include "bwt.h"
|
||||
#include "bwtaln.h"
|
||||
#include "bwa_gateway.h"
|
||||
#include "org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h"
|
||||
|
||||
typedef void (BWA::*int_setter)(int value);
|
||||
typedef void (BWA::*float_setter)(float value);
|
||||
|
||||
static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment);
|
||||
static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name);
|
||||
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter);
|
||||
static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter);
|
||||
static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message);
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration)
|
||||
{
|
||||
jstring java_ann = get_configuration_file(env,bwtFiles,"annFile");
|
||||
if(java_ann == NULL) return 0L;
|
||||
jstring java_amb = get_configuration_file(env,bwtFiles,"ambFile");
|
||||
if(java_amb == NULL) return 0L;
|
||||
jstring java_pac = get_configuration_file(env,bwtFiles,"pacFile");
|
||||
if(java_pac == NULL) return 0L;
|
||||
jstring java_forward_bwt = get_configuration_file(env,bwtFiles,"forwardBWTFile");
|
||||
if(java_forward_bwt == NULL) return 0L;
|
||||
jstring java_forward_sa = get_configuration_file(env,bwtFiles,"forwardSAFile");
|
||||
if(java_forward_sa == NULL) return 0L;
|
||||
jstring java_reverse_bwt = get_configuration_file(env,bwtFiles,"reverseBWTFile");
|
||||
if(java_reverse_bwt == NULL) return 0L;
|
||||
jstring java_reverse_sa = get_configuration_file(env,bwtFiles,"reverseSAFile");
|
||||
if(java_reverse_sa == NULL) return 0L;
|
||||
|
||||
const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
const char* amb_filename = env->GetStringUTFChars(java_amb,JNI_FALSE);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
const char* pac_filename = env->GetStringUTFChars(java_pac,JNI_FALSE);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
const char* forward_bwt_filename = env->GetStringUTFChars(java_forward_bwt,JNI_FALSE);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
const char* forward_sa_filename = env->GetStringUTFChars(java_forward_sa,JNI_FALSE);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
const char* reverse_bwt_filename = env->GetStringUTFChars(java_reverse_bwt,JNI_FALSE);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
const char* reverse_sa_filename = env->GetStringUTFChars(java_reverse_sa,JNI_FALSE);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
|
||||
BWA* bwa = new BWA(ann_filename,
|
||||
amb_filename,
|
||||
pac_filename,
|
||||
forward_bwt_filename,
|
||||
forward_sa_filename,
|
||||
reverse_bwt_filename,
|
||||
reverse_sa_filename);
|
||||
|
||||
Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(env,instance,(jlong)bwa,configuration);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
|
||||
env->ReleaseStringUTFChars(java_ann,ann_filename);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
env->ReleaseStringUTFChars(java_amb,amb_filename);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
env->ReleaseStringUTFChars(java_pac,pac_filename);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
env->ReleaseStringUTFChars(java_forward_bwt,forward_bwt_filename);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
env->ReleaseStringUTFChars(java_forward_sa,forward_sa_filename);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
env->ReleaseStringUTFChars(java_reverse_bwt,reverse_bwt_filename);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
env->ReleaseStringUTFChars(java_reverse_sa,reverse_sa_filename);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
|
||||
return (jlong)bwa;
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy(JNIEnv* env, jobject instance, jlong java_bwa)
|
||||
{
|
||||
BWA* bwa = (BWA*)java_bwa;
|
||||
delete bwa;
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(JNIEnv *env, jobject instance, jlong java_bwa, jobject configuration) {
|
||||
BWA* bwa = (BWA*)java_bwa;
|
||||
set_float_configuration_param(env, configuration, "maximumEditDistance", bwa, &BWA::set_max_edit_distance);
|
||||
if(env->ExceptionCheck()) return;
|
||||
set_int_configuration_param(env, configuration, "maximumGapOpens", bwa, &BWA::set_max_gap_opens);
|
||||
if(env->ExceptionCheck()) return;
|
||||
set_int_configuration_param(env, configuration, "maximumGapExtensions", bwa, &BWA::set_max_gap_extensions);
|
||||
if(env->ExceptionCheck()) return;
|
||||
set_int_configuration_param(env, configuration, "disallowIndelWithinRange", bwa, &BWA::set_disallow_indel_within_range);
|
||||
if(env->ExceptionCheck()) return;
|
||||
set_int_configuration_param(env, configuration, "mismatchPenalty", bwa, &BWA::set_mismatch_penalty);
|
||||
if(env->ExceptionCheck()) return;
|
||||
set_int_configuration_param(env, configuration, "gapOpenPenalty", bwa, &BWA::set_gap_open_penalty);
|
||||
if(env->ExceptionCheck()) return;
|
||||
set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty);
|
||||
if(env->ExceptionCheck()) return;
|
||||
}
|
||||
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases)
|
||||
{
|
||||
BWA* bwa = (BWA*)java_bwa;
|
||||
|
||||
const jsize read_length = env->GetArrayLength(java_bases);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
||||
if(read_bases == NULL) return NULL;
|
||||
|
||||
bwt_aln1_t* paths = NULL;
|
||||
unsigned num_paths = 0;
|
||||
|
||||
unsigned best_path_count, second_best_path_count;
|
||||
bwa->find_paths((const char*)read_bases,read_length,paths,num_paths,best_path_count,second_best_path_count);
|
||||
|
||||
jobjectArray java_paths = env->NewObjectArray(num_paths, env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath"), NULL);
|
||||
if(java_paths == NULL) return NULL;
|
||||
|
||||
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
|
||||
bwt_aln1_t& path = *(paths + path_idx);
|
||||
|
||||
jclass java_path_class = env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath");
|
||||
if(java_path_class == NULL) return NULL;
|
||||
|
||||
jmethodID java_path_constructor = env->GetMethodID(java_path_class, "<init>", "(IIIZJJIII)V");
|
||||
if(java_path_constructor == NULL) return NULL;
|
||||
|
||||
// Note that k/l are being cast to long. Bad things will happen if JNI assumes that they're ints.
|
||||
jobject java_path = env->NewObject(java_path_class,
|
||||
java_path_constructor,
|
||||
path.n_mm,
|
||||
path.n_gapo,
|
||||
path.n_gape,
|
||||
path.a,
|
||||
(jlong)path.k,
|
||||
(jlong)path.l,
|
||||
path.score,
|
||||
best_path_count,
|
||||
second_best_path_count);
|
||||
if(java_path == NULL) return NULL;
|
||||
|
||||
env->SetObjectArrayElement(java_paths,path_idx,java_path);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
env->DeleteLocalRef(java_path_class);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
|
||||
delete[] paths;
|
||||
|
||||
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
|
||||
|
||||
return env->ExceptionCheck() ? NULL : java_paths;
|
||||
}
|
||||
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases, jobjectArray java_paths)
|
||||
{
|
||||
BWA* bwa = (BWA*)java_bwa;
|
||||
|
||||
const jsize read_length = env->GetArrayLength(java_bases);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
||||
if(read_bases == NULL) return NULL;
|
||||
|
||||
const jsize num_paths = env->GetArrayLength(java_paths);
|
||||
bwt_aln1_t* paths = new bwt_aln1_t[num_paths];
|
||||
unsigned best_count = 0, second_best_count = 0;
|
||||
|
||||
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
|
||||
jobject java_path = env->GetObjectArrayElement(java_paths,path_idx);
|
||||
jclass java_path_class = env->GetObjectClass(java_path);
|
||||
if(java_path_class == NULL) return NULL;
|
||||
|
||||
bwt_aln1_t& path = *(paths + path_idx);
|
||||
|
||||
jfieldID mismatches_field = env->GetFieldID(java_path_class, "numMismatches", "I");
|
||||
if(mismatches_field == NULL) return NULL;
|
||||
path.n_mm = env->GetIntField(java_path,mismatches_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID gap_opens_field = env->GetFieldID(java_path_class, "numGapOpens", "I");
|
||||
if(gap_opens_field == NULL) return NULL;
|
||||
path.n_gapo = env->GetIntField(java_path,gap_opens_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID gap_extensions_field = env->GetFieldID(java_path_class, "numGapExtensions", "I");
|
||||
if(gap_extensions_field == NULL) return NULL;
|
||||
path.n_gape = env->GetIntField(java_path,gap_extensions_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID negative_strand_field = env->GetFieldID(java_path_class, "negativeStrand", "Z");
|
||||
if(negative_strand_field == NULL) return NULL;
|
||||
path.a = env->GetBooleanField(java_path,negative_strand_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID k_field = env->GetFieldID(java_path_class, "k", "J");
|
||||
if(k_field == NULL) return NULL;
|
||||
path.k = env->GetLongField(java_path,k_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID l_field = env->GetFieldID(java_path_class, "l", "J");
|
||||
if(l_field == NULL) return NULL;
|
||||
path.l = env->GetLongField(java_path,l_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID score_field = env->GetFieldID(java_path_class, "score", "I");
|
||||
if(score_field == NULL) return NULL;
|
||||
path.score = env->GetIntField(java_path,score_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID best_count_field = env->GetFieldID(java_path_class, "bestCount", "I");
|
||||
if(best_count_field == NULL) return NULL;
|
||||
best_count = env->GetIntField(java_path,best_count_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jfieldID second_best_count_field = env->GetFieldID(java_path_class, "secondBestCount", "I");
|
||||
if(second_best_count_field == NULL) return NULL;
|
||||
second_best_count = env->GetIntField(java_path,second_best_count_field);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
|
||||
Alignment* alignments = NULL;
|
||||
unsigned num_alignments = 0;
|
||||
bwa->generate_alignments_from_paths((const char*)read_bases,read_length,paths,num_paths,best_count,second_best_count,alignments,num_alignments);
|
||||
|
||||
jobjectArray java_alignments = env->NewObjectArray(num_alignments, env->FindClass("org/broadinstitute/sting/alignment/Alignment"), NULL);
|
||||
if(java_alignments == NULL) return NULL;
|
||||
|
||||
for(unsigned alignment_idx = 0; alignment_idx < (unsigned)num_alignments; alignment_idx++) {
|
||||
Alignment& alignment = *(alignments + alignment_idx);
|
||||
jobject java_alignment = convert_to_java_alignment(env,read_bases,read_length,alignment);
|
||||
if(java_alignment == NULL) return NULL;
|
||||
env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
|
||||
delete[] alignments;
|
||||
delete[] paths;
|
||||
|
||||
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
|
||||
|
||||
return env->ExceptionCheck() ? NULL : java_alignments;
|
||||
}
|
||||
|
||||
JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) {
|
||||
BWA* bwa = (BWA*)java_bwa;
|
||||
|
||||
const jsize read_length = env->GetArrayLength(java_bases);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
||||
if(read_bases == NULL) return NULL;
|
||||
|
||||
Alignment* best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length);
|
||||
jobject java_best_alignment = (best_alignment != NULL) ? convert_to_java_alignment(env,read_bases,read_length,*best_alignment) : NULL;
|
||||
delete best_alignment;
|
||||
|
||||
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
|
||||
|
||||
return java_best_alignment;
|
||||
}
|
||||
|
||||
static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment) {
|
||||
unsigned cigar_length;
|
||||
if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0;
|
||||
else if(!alignment.cigar) cigar_length = 1;
|
||||
else cigar_length = alignment.n_cigar;
|
||||
|
||||
jcharArray java_cigar_operators = env->NewCharArray(cigar_length);
|
||||
if(java_cigar_operators == NULL) return NULL;
|
||||
jintArray java_cigar_lengths = env->NewIntArray(cigar_length);
|
||||
if(java_cigar_lengths == NULL) return NULL;
|
||||
|
||||
if(alignment.cigar) {
|
||||
for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) {
|
||||
jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14];
|
||||
jint cigar_length = alignment.cigar[cigar_idx]&0x3fff;
|
||||
|
||||
env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(alignment.type != BWA_TYPE_NO_MATCH) {
|
||||
jchar cigar_operator = 'M';
|
||||
env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
}
|
||||
delete[] alignment.cigar;
|
||||
|
||||
jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment");
|
||||
if(java_alignment_class == NULL) return NULL;
|
||||
|
||||
jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[IILjava/lang/String;IIIII)V");
|
||||
if(java_alignment_constructor == NULL) return NULL;
|
||||
|
||||
jstring java_md = env->NewStringUTF(alignment.md);
|
||||
if(java_md == NULL) return NULL;
|
||||
delete[] alignment.md;
|
||||
|
||||
jobject java_alignment = env->NewObject(java_alignment_class,
|
||||
java_alignment_constructor,
|
||||
alignment.contig,
|
||||
alignment.pos,
|
||||
alignment.negative_strand,
|
||||
alignment.mapping_quality,
|
||||
java_cigar_operators,
|
||||
java_cigar_lengths,
|
||||
alignment.edit_distance,
|
||||
java_md,
|
||||
alignment.num_mismatches,
|
||||
alignment.num_gap_opens,
|
||||
alignment.num_gap_extensions,
|
||||
alignment.num_best,
|
||||
alignment.num_second_best);
|
||||
if(java_alignment == NULL) return NULL;
|
||||
|
||||
env->DeleteLocalRef(java_alignment_class);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
return java_alignment;
|
||||
}
|
||||
|
||||
static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name) {
|
||||
jclass configuration_class = env->GetObjectClass(configuration);
|
||||
if(configuration_class == NULL) return NULL;
|
||||
|
||||
jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/io/File;");
|
||||
if(configuration_field == NULL) return NULL;
|
||||
|
||||
jobject configuration_file = (jobject)env->GetObjectField(configuration,configuration_field);
|
||||
|
||||
jclass file_class = env->FindClass("java/io/File");
|
||||
if(file_class == NULL) return NULL;
|
||||
|
||||
jmethodID path_extractor = env->GetMethodID(file_class,"getAbsolutePath", "()Ljava/lang/String;");
|
||||
if(path_extractor == NULL) return NULL;
|
||||
|
||||
jstring path = (jstring)env->CallObjectMethod(configuration_file,path_extractor);
|
||||
if(path == NULL) return NULL;
|
||||
|
||||
env->DeleteLocalRef(configuration_class);
|
||||
env->DeleteLocalRef(file_class);
|
||||
env->DeleteLocalRef(configuration_file);
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) {
|
||||
jclass configuration_class = env->GetObjectClass(configuration);
|
||||
if(configuration_class == NULL) return;
|
||||
|
||||
jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Integer;");
|
||||
if(configuration_field == NULL) return;
|
||||
|
||||
jobject boxed_value = env->GetObjectField(configuration,configuration_field);
|
||||
if(env->ExceptionCheck()) return;
|
||||
|
||||
if(boxed_value != NULL) {
|
||||
jclass int_box_class = env->FindClass("java/lang/Integer");
|
||||
if(int_box_class == NULL) return;
|
||||
|
||||
jmethodID int_extractor = env->GetMethodID(int_box_class,"intValue", "()I");
|
||||
if(int_extractor == NULL) return;
|
||||
|
||||
jint value = env->CallIntMethod(boxed_value,int_extractor);
|
||||
if(env->ExceptionCheck()) return;
|
||||
|
||||
if(value < 0)
|
||||
{
|
||||
throw_config_value_exception(env,field_name,"cannot be set to a negative value");
|
||||
return;
|
||||
}
|
||||
|
||||
(bwa->*setter)(value);
|
||||
|
||||
env->DeleteLocalRef(int_box_class);
|
||||
}
|
||||
|
||||
env->DeleteLocalRef(boxed_value);
|
||||
env->DeleteLocalRef(configuration_class);
|
||||
}
|
||||
|
||||
static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter)
|
||||
{
|
||||
jclass configuration_class = env->GetObjectClass(configuration);
|
||||
if(configuration_class == NULL) return;
|
||||
|
||||
jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Float;");
|
||||
if(configuration_field == NULL) return;
|
||||
|
||||
jobject boxed_value = env->GetObjectField(configuration,configuration_field);
|
||||
if(boxed_value != NULL) {
|
||||
jclass float_box_class = env->FindClass("java/lang/Float");
|
||||
if(float_box_class == NULL) return;
|
||||
|
||||
jmethodID float_extractor = env->GetMethodID(float_box_class,"floatValue", "()F");
|
||||
if(float_extractor == NULL) return;
|
||||
|
||||
jfloat value = env->CallFloatMethod(boxed_value,float_extractor);
|
||||
if(env->ExceptionCheck()) return;
|
||||
|
||||
if(value < 0)
|
||||
{
|
||||
throw_config_value_exception(env,field_name,"cannot be set to a negative value");
|
||||
return;
|
||||
}
|
||||
|
||||
(bwa->*setter)(value);
|
||||
|
||||
env->DeleteLocalRef(float_box_class);
|
||||
}
|
||||
|
||||
env->DeleteLocalRef(boxed_value);
|
||||
env->DeleteLocalRef(configuration_class);
|
||||
}
|
||||
|
||||
static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message)
|
||||
{
|
||||
char* buffer = new char[strlen(field_name)+1+strlen(message)+1];
|
||||
sprintf(buffer,"%s %s",field_name,message);
|
||||
jclass sting_exception_class = env->FindClass("org/broadinstitute/sting/utils/StingException");
|
||||
if(sting_exception_class == NULL) return;
|
||||
env->ThrowNew(sting_exception_class, buffer);
|
||||
delete[] buffer;
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
/* DO NOT EDIT THIS FILE - it is machine generated */
|
||||
#include <jni.h>
|
||||
/* Header for class org_broadinstitute_sting_alignment_bwa_c_BWACAligner */
|
||||
|
||||
#ifndef _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||
#define _Included_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||
* Method: create
|
||||
* Signature: (Lorg/broadinstitute/sting/alignment/bwa/BWTFiles;Lorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)J
|
||||
*/
|
||||
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create
|
||||
(JNIEnv *, jobject, jobject, jobject);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||
* Method: updateConfiguration
|
||||
* Signature: (JLorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration
|
||||
(JNIEnv *, jobject, jlong, jobject);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||
* Method: destroy
|
||||
* Signature: (J)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_destroy
|
||||
(JNIEnv *, jobject, jlong);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||
* Method: getPaths
|
||||
* Signature: (J[B)[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;
|
||||
*/
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths
|
||||
(JNIEnv *, jobject, jlong, jbyteArray);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||
* Method: convertPathsToAlignments
|
||||
* Signature: (J[B[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;)[Lorg/broadinstitute/sting/alignment/Alignment;
|
||||
*/
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments
|
||||
(JNIEnv *, jobject, jlong, jbyteArray, jobjectArray);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||
* Method: getBestAlignment
|
||||
* Signature: (J[B)Lorg/broadinstitute/sting/alignment/Alignment;
|
||||
*/
|
||||
JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment
|
||||
(JNIEnv *, jobject, jlong, jbyteArray);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
CC=gcc
|
||||
CCFLAGS=-Wall -dynamiclib -arch i386 -arch x86_64
|
||||
|
||||
libenvironhack.dylib: libenvironhack.c
|
||||
$(CC) $(CCFLAGS) -init _init_environ $< -o $@
|
||||
|
||||
all: libenvironhack.dylib
|
||||
|
||||
clean:
|
||||
rm -f libenvironhack.dylib
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
LSF 7.0.6 on the mac is missing the unsatisfied exported symbol for environ which was removed on MacOS X 10.5+.
|
||||
nm $LSF_LIBDIR/liblsf.dylib | grep environ
|
||||
See "man environ" for more info, along with http://lists.apple.com/archives/java-dev/2007/Dec/msg00096.html
|
||||
*/
|
||||
|
||||
#include <crt_externs.h>
|
||||
|
||||
char **environ = (char **)0;
|
||||
|
||||
void init_environ(void) {
|
||||
environ = (*_NSGetEnviron());
|
||||
}
|
||||
Binary file not shown.
|
|
@ -0,0 +1,52 @@
|
|||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Create perfect alignments from the read to the genome represented by the given BWT / suffix array.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public interface Aligner {
|
||||
/**
|
||||
* Close this instance of the BWA pointer and delete its resources.
|
||||
*/
|
||||
public void close();
|
||||
|
||||
/**
|
||||
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
|
||||
* @param bases Bases to align.
|
||||
* @return An align
|
||||
*/
|
||||
public Alignment getBestAlignment(final byte[] bases);
|
||||
|
||||
/**
|
||||
* Align the read to the reference.
|
||||
* @param read Read to align.
|
||||
* @param header Optional header to drop in place.
|
||||
* @return A list of the alignments.
|
||||
*/
|
||||
public SAMRecord align(final SAMRecord read, final SAMFileHeader header);
|
||||
|
||||
/**
|
||||
* Get a iterator of alignments, batched by mapping quality.
|
||||
* @param bases List of bases.
|
||||
* @return Iterator to alignments.
|
||||
*/
|
||||
public Iterable<Alignment[]> getAllAlignments(final byte[] bases);
|
||||
|
||||
/**
|
||||
* Get a iterator of aligned reads, batched by mapping quality.
|
||||
* @param read Read to align.
|
||||
* @param newHeader Optional new header to use when aligning the read. If present, it must be null.
|
||||
* @return Iterator to alignments.
|
||||
*/
|
||||
public Iterable<SAMRecord[]> alignAll(final SAMRecord read, final SAMFileHeader newHeader);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,221 @@
|
|||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
/**
|
||||
* Represents an alignment of a read to a site in the reference genome.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class Alignment {
|
||||
protected int contigIndex;
|
||||
protected long alignmentStart;
|
||||
protected boolean negativeStrand;
|
||||
protected int mappingQuality;
|
||||
|
||||
protected char[] cigarOperators;
|
||||
protected int[] cigarLengths;
|
||||
|
||||
protected int editDistance;
|
||||
protected String mismatchingPositions;
|
||||
|
||||
protected int numMismatches;
|
||||
protected int numGapOpens;
|
||||
protected int numGapExtensions;
|
||||
protected int bestCount;
|
||||
protected int secondBestCount;
|
||||
|
||||
/**
|
||||
* Gets the index of the given contig.
|
||||
* @return the inde
|
||||
*/
|
||||
public int getContigIndex() { return contigIndex; }
|
||||
|
||||
/**
|
||||
* Gets the starting position for the given alignment.
|
||||
* @return Starting position.
|
||||
*/
|
||||
public long getAlignmentStart() { return alignmentStart; }
|
||||
|
||||
/**
|
||||
* Is the given alignment on the reverse strand?
|
||||
* @return True if the alignment is on the reverse strand.
|
||||
*/
|
||||
public boolean isNegativeStrand() { return negativeStrand; }
|
||||
|
||||
/**
|
||||
* Gets the score of this alignment.
|
||||
* @return The score.
|
||||
*/
|
||||
public int getMappingQuality() { return mappingQuality; }
|
||||
|
||||
/**
|
||||
* Gets the edit distance; will eventually end up in the NM SAM tag
|
||||
* if this alignment makes it that far.
|
||||
* @return The edit distance.
|
||||
*/
|
||||
public int getEditDistance() { return editDistance; }
|
||||
|
||||
/**
|
||||
* A string representation of which positions mismatch; contents of MD tag.
|
||||
* @return String representation of mismatching positions.
|
||||
*/
|
||||
public String getMismatchingPositions() { return mismatchingPositions; }
|
||||
|
||||
/**
|
||||
* Gets the number of mismatches in the read.
|
||||
* @return Number of mismatches.
|
||||
*/
|
||||
public int getNumMismatches() { return numMismatches; }
|
||||
|
||||
/**
|
||||
* Get the number of gap opens.
|
||||
* @return Number of gap opens.
|
||||
*/
|
||||
public int getNumGapOpens() { return numGapOpens; }
|
||||
|
||||
/**
|
||||
* Get the number of gap extensions.
|
||||
* @return Number of gap extensions.
|
||||
*/
|
||||
public int getNumGapExtensions() { return numGapExtensions; }
|
||||
|
||||
/**
|
||||
* Get the number of best alignments.
|
||||
* @return Number of top scoring alignments.
|
||||
*/
|
||||
public int getBestCount() { return bestCount; }
|
||||
|
||||
/**
|
||||
* Get the number of second best alignments.
|
||||
* @return Number of second best scoring alignments.
|
||||
*/
|
||||
public int getSecondBestCount() { return secondBestCount; }
|
||||
|
||||
/**
|
||||
* Gets the cigar for this alignment.
|
||||
* @return sam-jdk formatted alignment.
|
||||
*/
|
||||
public Cigar getCigar() {
|
||||
Cigar cigar = new Cigar();
|
||||
for(int i = 0; i < cigarOperators.length; i++) {
|
||||
CigarOperator operator = CigarOperator.characterToEnum(cigarOperators[i]);
|
||||
cigar.add(new CigarElement(cigarLengths[i],operator));
|
||||
}
|
||||
return cigar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Temporarily implement getCigarString() for debugging; the TextCigarCodec is unfortunately
|
||||
* package-protected.
|
||||
* @return
|
||||
*/
|
||||
public String getCigarString() {
|
||||
Cigar cigar = getCigar();
|
||||
if(cigar.isEmpty()) return "*";
|
||||
|
||||
StringBuilder cigarString = new StringBuilder();
|
||||
for(CigarElement element: cigar.getCigarElements()) {
|
||||
cigarString.append(element.getLength());
|
||||
cigarString.append(element.getOperator());
|
||||
}
|
||||
return cigarString.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stub for inheritance.
|
||||
*/
|
||||
public Alignment() {}
|
||||
|
||||
/**
|
||||
* Create a new alignment object.
|
||||
* @param contigIndex The contig to which this read aligned.
|
||||
* @param alignmentStart The point within the contig to which this read aligned.
|
||||
* @param negativeStrand Forward or reverse alignment of the given read.
|
||||
* @param mappingQuality How good does BWA think this mapping is?
|
||||
* @param cigarOperators The ordered operators in the cigar string.
|
||||
* @param cigarLengths The lengths to which each operator applies.
|
||||
* @param editDistance The edit distance (cumulative) of the read.
|
||||
* @param mismatchingPositions String representation of which bases in the read mismatch.
|
||||
* @param numMismatches Number of total mismatches in the read.
|
||||
* @param numGapOpens Number of gap opens in the read.
|
||||
* @param numGapExtensions Number of gap extensions in the read.
|
||||
* @param bestCount Number of best alignments in the read.
|
||||
* @param secondBestCount Number of second best alignments in the read.
|
||||
*/
|
||||
public Alignment(int contigIndex,
|
||||
int alignmentStart,
|
||||
boolean negativeStrand,
|
||||
int mappingQuality,
|
||||
char[] cigarOperators,
|
||||
int[] cigarLengths,
|
||||
int editDistance,
|
||||
String mismatchingPositions,
|
||||
int numMismatches,
|
||||
int numGapOpens,
|
||||
int numGapExtensions,
|
||||
int bestCount,
|
||||
int secondBestCount) {
|
||||
this.contigIndex = contigIndex;
|
||||
this.alignmentStart = alignmentStart;
|
||||
this.negativeStrand = negativeStrand;
|
||||
this.mappingQuality = mappingQuality;
|
||||
this.cigarOperators = cigarOperators;
|
||||
this.cigarLengths = cigarLengths;
|
||||
this.editDistance = editDistance;
|
||||
this.mismatchingPositions = mismatchingPositions;
|
||||
this.numMismatches = numMismatches;
|
||||
this.numGapOpens = numGapOpens;
|
||||
this.numGapExtensions = numGapExtensions;
|
||||
this.bestCount = bestCount;
|
||||
this.secondBestCount = secondBestCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a read directly from an alignment.
|
||||
* @param alignment The alignment to convert to a read.
|
||||
* @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags.
|
||||
* @param newSAMHeader The new SAM header to use in creating this read. Can be null, but if so, the sequence
|
||||
* dictionary in the
|
||||
* @return A mapped alignment.
|
||||
*/
|
||||
public static SAMRecord convertToRead(Alignment alignment, SAMRecord unmappedRead, SAMFileHeader newSAMHeader) {
|
||||
SAMRecord read;
|
||||
try {
|
||||
read = (SAMRecord)unmappedRead.clone();
|
||||
}
|
||||
catch(CloneNotSupportedException ex) {
|
||||
throw new ReviewedStingException("Unable to create aligned read from template.");
|
||||
}
|
||||
|
||||
if(newSAMHeader != null)
|
||||
read.setHeader(newSAMHeader);
|
||||
|
||||
// If we're realigning a previously aligned record, strip out the placement of the alignment.
|
||||
read.setReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
|
||||
read.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
|
||||
read.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
|
||||
read.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
|
||||
|
||||
if(alignment != null) {
|
||||
read.setReadUnmappedFlag(false);
|
||||
read.setReferenceIndex(alignment.getContigIndex());
|
||||
read.setAlignmentStart((int)alignment.getAlignmentStart());
|
||||
read.setReadNegativeStrandFlag(alignment.isNegativeStrand());
|
||||
read.setMappingQuality(alignment.getMappingQuality());
|
||||
read.setCigar(alignment.getCigar());
|
||||
if(alignment.isNegativeStrand()) {
|
||||
read.setReadBases(BaseUtils.simpleReverseComplement(read.getReadBases()));
|
||||
read.setBaseQualities(Utils.reverse(read.getBaseQualities()));
|
||||
}
|
||||
read.setAttribute("NM",alignment.getEditDistance());
|
||||
read.setAttribute("MD",alignment.getMismatchingPositions());
|
||||
}
|
||||
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,157 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Validates consistency of the aligner interface by taking reads already aligned by BWA in a BAM file, stripping them
|
||||
* of their alignment data, realigning them, and making sure one of the best resulting realignments matches the original
|
||||
* alignment from the input file.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
||||
/**
|
||||
* The supporting BWT index generated using BWT.
|
||||
*/
|
||||
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
|
||||
private String prefix = null;
|
||||
|
||||
/**
|
||||
* The instance used to generate alignments.
|
||||
*/
|
||||
private BWACAligner aligner = null;
|
||||
|
||||
/**
|
||||
* Create an aligner object. The aligner object will load and hold the BWT until close() is called.
|
||||
*/
|
||||
@Override
|
||||
public void initialize() {
|
||||
if(prefix == null)
|
||||
prefix = getToolkit().getArguments().referenceFile.getAbsolutePath();
|
||||
BWTFiles bwtFiles = new BWTFiles(prefix);
|
||||
BWAConfiguration configuration = new BWAConfiguration();
|
||||
aligner = new BWACAligner(bwtFiles,configuration);
|
||||
}
|
||||
|
||||
/**
|
||||
* Aligns a read to the given reference.
|
||||
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||
* @param read Read to align.
|
||||
* @return Number of reads aligned by this map (aka 1).
|
||||
*/
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
//logger.info(String.format("examining read %s", read.getReadName()));
|
||||
|
||||
byte[] bases = read.getReadBases();
|
||||
if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases);
|
||||
|
||||
boolean matches = true;
|
||||
Iterable<Alignment[]> alignments = aligner.getAllAlignments(bases);
|
||||
Iterator<Alignment[]> alignmentIterator = alignments.iterator();
|
||||
|
||||
if(!alignmentIterator.hasNext()) {
|
||||
matches = read.getReadUnmappedFlag();
|
||||
}
|
||||
else {
|
||||
Alignment[] alignmentsOfBestQuality = alignmentIterator.next();
|
||||
for(Alignment alignment: alignmentsOfBestQuality) {
|
||||
matches = (alignment.getContigIndex() == read.getReferenceIndex());
|
||||
matches &= (alignment.getAlignmentStart() == read.getAlignmentStart());
|
||||
matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag());
|
||||
matches &= (alignment.getCigar().equals(read.getCigar()));
|
||||
matches &= (alignment.getMappingQuality() == read.getMappingQuality());
|
||||
if(matches) break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!matches) {
|
||||
logger.error("Found mismatch!");
|
||||
logger.error(String.format("Read %s:",read.getReadName()));
|
||||
logger.error(String.format(" Contig index: %d",read.getReferenceIndex()));
|
||||
logger.error(String.format(" Alignment start: %d", read.getAlignmentStart()));
|
||||
logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag()));
|
||||
logger.error(String.format(" Cigar: %s%n", read.getCigarString()));
|
||||
logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality()));
|
||||
for(Alignment[] alignmentsByScore: alignments) {
|
||||
for(int i = 0; i < alignmentsByScore.length; i++) {
|
||||
logger.error(String.format("Alignment %d:",i));
|
||||
logger.error(String.format(" Contig index: %d",alignmentsByScore[i].getContigIndex()));
|
||||
logger.error(String.format(" Alignment start: %d", alignmentsByScore[i].getAlignmentStart()));
|
||||
logger.error(String.format(" Negative strand: %b", alignmentsByScore[i].isNegativeStrand()));
|
||||
logger.error(String.format(" Cigar: %s", alignmentsByScore[i].getCigarString()));
|
||||
logger.error(String.format(" Mapping quality: %s%n", alignmentsByScore[i].getMappingQuality()));
|
||||
}
|
||||
}
|
||||
throw new ReviewedStingException(String.format("Read %s mismatches!", read.getReadName()));
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initial value for reduce. In this case, validated reads will be counted.
|
||||
* @return 0, indicating no reads yet validated.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
/**
|
||||
* Calculates the number of reads processed.
|
||||
* @param value Number of reads processed by this map.
|
||||
* @param sum Number of reads processed before this map.
|
||||
* @return Number of reads processed up to and including this map.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup.
|
||||
* @param result Number of reads processed.
|
||||
*/
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
aligner.close();
|
||||
super.onTraversalDone(result);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Aligns reads to a given reference using Heng Li's BWA aligner, presenting the resulting alignments in SAM or BAM format.
|
||||
* Mimics the steps 'bwa aln' followed by 'bwa samse' using the BWA/C implementation.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@WalkerName("Align")
|
||||
public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
||||
@Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " +
|
||||
"generated by bwa index -d bwtsw. If unspecified, will default " +
|
||||
"to the reference specified via the -R argument.",required=false)
|
||||
private File targetReferenceFile = null;
|
||||
|
||||
@Output
|
||||
private StingSAMFileWriter out = null;
|
||||
|
||||
/**
|
||||
* The actual aligner.
|
||||
*/
|
||||
private BWACAligner aligner = null;
|
||||
|
||||
/**
|
||||
* New header to use, if desired.
|
||||
*/
|
||||
private SAMFileHeader header;
|
||||
|
||||
/**
|
||||
* Create an aligner object. The aligner object will load and hold the BWT until close() is called.
|
||||
*/
|
||||
@Override
|
||||
public void initialize() {
|
||||
if(targetReferenceFile == null)
|
||||
targetReferenceFile = getToolkit().getArguments().referenceFile;
|
||||
BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath());
|
||||
BWAConfiguration configuration = new BWAConfiguration();
|
||||
aligner = new BWACAligner(bwtFiles,configuration);
|
||||
|
||||
// Take the header of the SAM file, tweak it by adding in the reference dictionary and specifying that the target file is unsorted.
|
||||
header = getToolkit().getSAMFileHeader().clone();
|
||||
SAMSequenceDictionary referenceDictionary =
|
||||
ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary();
|
||||
header.setSequenceDictionary(referenceDictionary);
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
||||
|
||||
out.writeHeader(header);
|
||||
}
|
||||
|
||||
/**
|
||||
* Aligns a read to the given reference.
|
||||
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||
* @param read Read to align.
|
||||
* @return Number of alignments found for this read.
|
||||
*/
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
SAMRecord alignedRead = aligner.align(read,header);
|
||||
out.addAlignment(alignedRead);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initial value for reduce. In this case, alignments will be counted.
|
||||
* @return 0, indicating no alignments yet found.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
/**
|
||||
* Calculates the number of alignments found.
|
||||
* @param value Number of alignments found by this map.
|
||||
* @param sum Number of alignments found before this map.
|
||||
* @return Number of alignments found up to and including this map.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup.
|
||||
* @param result Number of reads processed.
|
||||
*/
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
aligner.close();
|
||||
super.onTraversalDone(result);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Counts the number of best alignments as presented by BWA and outputs a histogram of number of placements vs. the
|
||||
* frequency of that number of placements.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class CountBestAlignmentsWalker extends ReadWalker<Integer,Integer> {
|
||||
/**
|
||||
* The supporting BWT index generated using BWT.
|
||||
*/
|
||||
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
|
||||
private String prefix = null;
|
||||
|
||||
@Output
|
||||
private PrintStream out = null;
|
||||
|
||||
/**
|
||||
* The actual aligner.
|
||||
*/
|
||||
private Aligner aligner = null;
|
||||
|
||||
private SortedMap<Integer,Integer> alignmentFrequencies = new TreeMap<Integer,Integer>();
|
||||
|
||||
/**
|
||||
* Create an aligner object. The aligner object will load and hold the BWT until close() is called.
|
||||
*/
|
||||
@Override
|
||||
public void initialize() {
|
||||
if(prefix == null)
|
||||
prefix = getToolkit().getArguments().referenceFile.getAbsolutePath();
|
||||
BWTFiles bwtFiles = new BWTFiles(prefix);
|
||||
BWAConfiguration configuration = new BWAConfiguration();
|
||||
aligner = new BWACAligner(bwtFiles,configuration);
|
||||
}
|
||||
|
||||
/**
|
||||
* Aligns a read to the given reference.
|
||||
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||
* @param read Read to align.
|
||||
* @return Number of alignments found for this read.
|
||||
*/
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
Iterator<Alignment[]> alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator();
|
||||
if(alignmentIterator.hasNext()) {
|
||||
int numAlignments = alignmentIterator.next().length;
|
||||
if(alignmentFrequencies.containsKey(numAlignments))
|
||||
alignmentFrequencies.put(numAlignments,alignmentFrequencies.get(numAlignments)+1);
|
||||
else
|
||||
alignmentFrequencies.put(numAlignments,1);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initial value for reduce. In this case, validated reads will be counted.
|
||||
* @return 0, indicating no reads yet validated.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
/**
|
||||
* Calculates the number of reads processed.
|
||||
* @param value Number of reads processed by this map.
|
||||
* @param sum Number of reads processed before this map.
|
||||
* @return Number of reads processed up to and including this map.
|
||||
*/
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup.
|
||||
* @param result Number of reads processed.
|
||||
*/
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
aligner.close();
|
||||
for(Map.Entry<Integer,Integer> alignmentFrequency: alignmentFrequencies.entrySet())
|
||||
out.printf("%d\t%d%n", alignmentFrequency.getKey(), alignmentFrequency.getValue());
|
||||
super.onTraversalDone(result);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
package org.broadinstitute.sting.alignment.bwa;
|
||||
|
||||
import org.broadinstitute.sting.alignment.Aligner;
|
||||
|
||||
/**
|
||||
* Align reads using BWA.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public abstract class BWAAligner implements Aligner {
|
||||
/**
|
||||
* The supporting files used by BWA.
|
||||
*/
|
||||
protected BWTFiles bwtFiles;
|
||||
|
||||
/**
|
||||
* The current configuration for the BWA aligner.
|
||||
*/
|
||||
protected BWAConfiguration configuration;
|
||||
|
||||
/**
|
||||
* Create a new BWAAligner. Purpose of this call is to ensure that all BWA constructors accept the correct
|
||||
* parameters.
|
||||
* @param bwtFiles The many files representing BWTs persisted to disk.
|
||||
* @param configuration Configuration parameters for the alignment.
|
||||
*/
|
||||
public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) {
|
||||
this.bwtFiles = bwtFiles;
|
||||
this.configuration = configuration;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the configuration passed to the BWA aligner.
|
||||
* @param configuration New configuration to set.
|
||||
*/
|
||||
public abstract void updateConfiguration(BWAConfiguration configuration);
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
package org.broadinstitute.sting.alignment.bwa;
|
||||
|
||||
/**
|
||||
* Configuration for the BWA/C aligner.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWAConfiguration {
|
||||
/**
|
||||
* The maximum edit distance used by BWA.
|
||||
*/
|
||||
public Float maximumEditDistance = null;
|
||||
|
||||
/**
|
||||
* How many gap opens are acceptable within this alignment?
|
||||
*/
|
||||
public Integer maximumGapOpens = null;
|
||||
|
||||
/**
|
||||
* How many gap extensions are acceptable within this alignment?
|
||||
*/
|
||||
public Integer maximumGapExtensions = null;
|
||||
|
||||
/**
|
||||
* Do we disallow indels within a certain range from the start / end?
|
||||
*/
|
||||
public Integer disallowIndelWithinRange = null;
|
||||
|
||||
/**
|
||||
* What is the scoring penalty for a mismatch?
|
||||
*/
|
||||
public Integer mismatchPenalty = null;
|
||||
|
||||
/**
|
||||
* What is the scoring penalty for a gap open?
|
||||
*/
|
||||
public Integer gapOpenPenalty = null;
|
||||
|
||||
/**
|
||||
* What is the scoring penalty for a gap extension?
|
||||
*/
|
||||
public Integer gapExtensionPenalty = null;
|
||||
}
|
||||
|
|
@ -0,0 +1,240 @@
|
|||
package org.broadinstitute.sting.alignment.bwa;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.BWT;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.BWTWriter;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.SuffixArray;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.SuffixArrayWriter;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.ANNWriter;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.AMBWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
|
||||
/**
|
||||
* Support files for BWT.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWTFiles {
|
||||
/**
|
||||
* ANN (?) file name.
|
||||
*/
|
||||
public final File annFile;
|
||||
|
||||
/**
|
||||
* AMB (?) file name.
|
||||
*/
|
||||
public final File ambFile;
|
||||
|
||||
/**
|
||||
* Packed reference sequence file.
|
||||
*/
|
||||
public final File pacFile;
|
||||
|
||||
/**
|
||||
* Reverse of packed reference sequence file.
|
||||
*/
|
||||
public final File rpacFile;
|
||||
|
||||
/**
|
||||
* Forward BWT file.
|
||||
*/
|
||||
public final File forwardBWTFile;
|
||||
|
||||
/**
|
||||
* Forward suffix array file.
|
||||
*/
|
||||
public final File forwardSAFile;
|
||||
|
||||
/**
|
||||
* Reverse BWT file.
|
||||
*/
|
||||
public final File reverseBWTFile;
|
||||
|
||||
/**
|
||||
* Reverse suffix array file.
|
||||
*/
|
||||
public final File reverseSAFile;
|
||||
|
||||
/**
|
||||
* Where these files autogenerated on the fly?
|
||||
*/
|
||||
public final boolean autogenerated;
|
||||
|
||||
/**
|
||||
* Create a new BWA configuration file using the given prefix.
|
||||
* @param prefix Prefix to use when creating the configuration. Must not be null.
|
||||
*/
|
||||
public BWTFiles(String prefix) {
|
||||
if(prefix == null)
|
||||
throw new ReviewedStingException("Prefix must not be null.");
|
||||
annFile = new File(prefix + ".ann");
|
||||
ambFile = new File(prefix + ".amb");
|
||||
pacFile = new File(prefix + ".pac");
|
||||
rpacFile = new File(prefix + ".rpac");
|
||||
forwardBWTFile = new File(prefix + ".bwt");
|
||||
forwardSAFile = new File(prefix + ".sa");
|
||||
reverseBWTFile = new File(prefix + ".rbwt");
|
||||
reverseSAFile = new File(prefix + ".rsa");
|
||||
autogenerated = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hand-create a new BWTFiles object, specifying a unique file object for each type.
|
||||
* @param annFile ANN (alternate dictionary) file.
|
||||
* @param ambFile AMB (holes) files.
|
||||
* @param pacFile Packed representation of the forward reference sequence.
|
||||
* @param forwardBWTFile BWT representation of the forward reference sequence.
|
||||
* @param forwardSAFile SA representation of the forward reference sequence.
|
||||
* @param rpacFile Packed representation of the reversed reference sequence.
|
||||
* @param reverseBWTFile BWT representation of the reversed reference sequence.
|
||||
* @param reverseSAFile SA representation of the reversed reference sequence.
|
||||
*/
|
||||
private BWTFiles(File annFile,
|
||||
File ambFile,
|
||||
File pacFile,
|
||||
File forwardBWTFile,
|
||||
File forwardSAFile,
|
||||
File rpacFile,
|
||||
File reverseBWTFile,
|
||||
File reverseSAFile) {
|
||||
this.annFile = annFile;
|
||||
this.ambFile = ambFile;
|
||||
this.pacFile = pacFile;
|
||||
this.forwardBWTFile = forwardBWTFile;
|
||||
this.forwardSAFile = forwardSAFile;
|
||||
this.rpacFile = rpacFile;
|
||||
this.reverseBWTFile = reverseBWTFile;
|
||||
this.reverseSAFile = reverseSAFile;
|
||||
autogenerated = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close out this files object, in the process deleting any temporary filse
|
||||
* that were created.
|
||||
*/
|
||||
public void close() {
|
||||
if(autogenerated) {
|
||||
boolean success = true;
|
||||
success = annFile.delete();
|
||||
success &= ambFile.delete();
|
||||
success &= pacFile.delete();
|
||||
success &= forwardBWTFile.delete();
|
||||
success &= forwardSAFile.delete();
|
||||
success &= rpacFile.delete();
|
||||
success &= reverseBWTFile.delete();
|
||||
success &= reverseSAFile.delete();
|
||||
|
||||
if(!success)
|
||||
throw new ReviewedStingException("Unable to clean up autogenerated representation");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new set of BWT files from the given reference sequence.
|
||||
* @param referenceSequence Sequence from which to build metadata.
|
||||
* @return A new object representing encoded representations of each sequence.
|
||||
*/
|
||||
public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) {
|
||||
byte[] normalizedReferenceSequence = new byte[referenceSequence.length];
|
||||
System.arraycopy(referenceSequence,0,normalizedReferenceSequence,0,referenceSequence.length);
|
||||
normalizeReferenceSequence(normalizedReferenceSequence);
|
||||
|
||||
File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile;
|
||||
try {
|
||||
// Write the ann and amb for this reference sequence.
|
||||
annFile = File.createTempFile("bwt",".ann");
|
||||
ambFile = File.createTempFile("bwt",".amb");
|
||||
|
||||
SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
|
||||
dictionary.addSequence(new SAMSequenceRecord("autogenerated",normalizedReferenceSequence.length));
|
||||
|
||||
ANNWriter annWriter = new ANNWriter(annFile);
|
||||
annWriter.write(dictionary);
|
||||
annWriter.close();
|
||||
|
||||
AMBWriter ambWriter = new AMBWriter(ambFile);
|
||||
ambWriter.writeEmpty(dictionary);
|
||||
ambWriter.close();
|
||||
|
||||
// Write the encoded files for the forward version of this reference sequence.
|
||||
pacFile = File.createTempFile("bwt",".pac");
|
||||
bwtFile = File.createTempFile("bwt",".bwt");
|
||||
saFile = File.createTempFile("bwt",".sa");
|
||||
|
||||
writeEncodedReferenceSequence(normalizedReferenceSequence,pacFile,bwtFile,saFile);
|
||||
|
||||
// Write the encoded files for the reverse version of this reference sequence.
|
||||
byte[] reverseReferenceSequence = Utils.reverse(normalizedReferenceSequence);
|
||||
|
||||
rpacFile = File.createTempFile("bwt",".rpac");
|
||||
rbwtFile = File.createTempFile("bwt",".rbwt");
|
||||
rsaFile = File.createTempFile("bwt",".rsa");
|
||||
|
||||
writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to write autogenerated reference sequence to temporary files");
|
||||
}
|
||||
|
||||
// Make sure that, at the very least, all temporary files are deleted on exit.
|
||||
annFile.deleteOnExit();
|
||||
ambFile.deleteOnExit();
|
||||
pacFile.deleteOnExit();
|
||||
bwtFile.deleteOnExit();
|
||||
saFile.deleteOnExit();
|
||||
rpacFile.deleteOnExit();
|
||||
rbwtFile.deleteOnExit();
|
||||
rsaFile.deleteOnExit();
|
||||
|
||||
return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the encoded form of the reference sequence. In the case of BWA, the encoded reference
|
||||
* sequence is the reference itself in PAC format, the BWT, and the suffix array.
|
||||
* @param referenceSequence The reference sequence to encode.
|
||||
* @param pacFile Target for the PAC-encoded reference.
|
||||
* @param bwtFile Target for the BWT representation of the reference.
|
||||
* @param suffixArrayFile Target for the suffix array encoding of the reference.
|
||||
* @throws java.io.IOException In case of issues writing to the file.
|
||||
*/
|
||||
private static void writeEncodedReferenceSequence(byte[] referenceSequence,
|
||||
File pacFile,
|
||||
File bwtFile,
|
||||
File suffixArrayFile) throws IOException {
|
||||
PackUtils.writeReferenceSequence(pacFile,referenceSequence);
|
||||
|
||||
BWT bwt = BWT.createFromReferenceSequence(referenceSequence);
|
||||
BWTWriter bwtWriter = new BWTWriter(bwtFile);
|
||||
bwtWriter.write(bwt);
|
||||
bwtWriter.close();
|
||||
|
||||
SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
|
||||
SuffixArrayWriter suffixArrayWriter = new SuffixArrayWriter(suffixArrayFile);
|
||||
suffixArrayWriter.write(suffixArray);
|
||||
suffixArrayWriter.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the given reference sequence into a form suitable for building into
|
||||
* on-the-fly sequences.
|
||||
* @param referenceSequence The reference sequence to normalize.
|
||||
* @throws org.broadinstitute.sting.utils.exceptions.ReviewedStingException if normalized sequence cannot be generated.
|
||||
*/
|
||||
private static void normalizeReferenceSequence(byte[] referenceSequence) {
|
||||
StringUtil.toUpperCase(referenceSequence);
|
||||
for(byte base: referenceSequence) {
|
||||
if(base != 'A' && base != 'C' && base != 'G' && base != 'T')
|
||||
throw new ReviewedStingException(String.format("Base type %c is not supported when building references on-the-fly",(char)base));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,258 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.c;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* An aligner using the BWA/C implementation.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWACAligner extends BWAAligner {
|
||||
static {
|
||||
System.loadLibrary("bwa");
|
||||
}
|
||||
|
||||
/**
|
||||
* A pointer to the C++ object representing the BWA engine.
|
||||
*/
|
||||
private long thunkPointer = 0;
|
||||
|
||||
public BWACAligner(BWTFiles bwtFiles, BWAConfiguration configuration) {
|
||||
super(bwtFiles,configuration);
|
||||
if(thunkPointer != 0)
|
||||
throw new ReviewedStingException("BWA/C attempting to reinitialize.");
|
||||
|
||||
if(!bwtFiles.annFile.exists()) throw new ReviewedStingException("ANN file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.ambFile.exists()) throw new ReviewedStingException("AMB file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.pacFile.exists()) throw new ReviewedStingException("PAC file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.forwardBWTFile.exists()) throw new ReviewedStingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.forwardSAFile.exists()) throw new ReviewedStingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.reverseBWTFile.exists()) throw new ReviewedStingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.reverseSAFile.exists()) throw new ReviewedStingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
|
||||
thunkPointer = create(bwtFiles,configuration);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an aligner object using an array of bytes as a reference.
|
||||
* @param referenceSequence Reference sequence to encode ad-hoc.
|
||||
* @param configuration Configuration for the given aligner.
|
||||
*/
|
||||
public BWACAligner(byte[] referenceSequence, BWAConfiguration configuration) {
|
||||
this(BWTFiles.createFromReferenceSequence(referenceSequence),configuration);
|
||||
// Now that the temporary files are created, the temporary files can be destroyed.
|
||||
bwtFiles.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the configuration passed to the BWA aligner.
|
||||
* @param configuration New configuration to set.
|
||||
*/
|
||||
@Override
|
||||
public void updateConfiguration(BWAConfiguration configuration) {
|
||||
if(thunkPointer == 0)
|
||||
throw new ReviewedStingException("BWA/C: attempting to update configuration of uninitialized aligner.");
|
||||
updateConfiguration(thunkPointer,configuration);
|
||||
}
|
||||
|
||||
/**
|
||||
* Close this instance of the BWA pointer and delete its resources.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
if(thunkPointer == 0)
|
||||
throw new ReviewedStingException("BWA/C close attempted, but BWA/C is not properly initialized.");
|
||||
destroy(thunkPointer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
|
||||
* @param bases Bases to align.
|
||||
* @return An align
|
||||
*/
|
||||
@Override
|
||||
public Alignment getBestAlignment(final byte[] bases) {
|
||||
if(thunkPointer == 0)
|
||||
throw new ReviewedStingException("BWA/C getBestAlignment attempted, but BWA/C is not properly initialized.");
|
||||
return getBestAlignment(thunkPointer,bases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the best aligned read, chosen randomly from the pile of best alignments.
|
||||
* @param read Read to align.
|
||||
* @param newHeader New header to apply to this SAM file. Can be null, but if so, read header must be valid.
|
||||
* @return Read with injected alignment data.
|
||||
*/
|
||||
@Override
|
||||
public SAMRecord align(final SAMRecord read, final SAMFileHeader newHeader) {
|
||||
if(bwtFiles.autogenerated)
|
||||
throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable");
|
||||
return Alignment.convertToRead(getBestAlignment(read.getReadBases()),read,newHeader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a iterator of alignments, batched by mapping quality.
|
||||
* @param bases List of bases.
|
||||
* @return Iterator to alignments.
|
||||
*/
|
||||
@Override
|
||||
public Iterable<Alignment[]> getAllAlignments(final byte[] bases) {
|
||||
final BWAPath[] paths = getPaths(bases);
|
||||
return new Iterable<Alignment[]>() {
|
||||
public Iterator<Alignment[]> iterator() {
|
||||
return new Iterator<Alignment[]>() {
|
||||
/**
|
||||
* The last position accessed.
|
||||
*/
|
||||
private int position = 0;
|
||||
|
||||
/**
|
||||
* Whether all alignments have been seen based on the current position.
|
||||
* @return True if any more alignments are pending. False otherwise.
|
||||
*/
|
||||
public boolean hasNext() { return position < paths.length; }
|
||||
|
||||
/**
|
||||
* Return the next cross-section of alignments, based on mapping quality.
|
||||
* @return Array of the next set of alignments of a given mapping quality.
|
||||
*/
|
||||
public Alignment[] next() {
|
||||
if(position >= paths.length)
|
||||
throw new UnsupportedOperationException("Out of alignments to return.");
|
||||
int score = paths[position].score;
|
||||
int startingPosition = position;
|
||||
while(position < paths.length && paths[position].score == score) position++;
|
||||
return convertPathsToAlignments(bases,Arrays.copyOfRange(paths,startingPosition,position));
|
||||
}
|
||||
|
||||
/**
|
||||
* Unsupported.
|
||||
*/
|
||||
public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); }
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a iterator of aligned reads, batched by mapping quality.
|
||||
* @param read Read to align.
|
||||
* @param newHeader Optional new header to use when aligning the read. If present, it must be null.
|
||||
* @return Iterator to alignments.
|
||||
*/
|
||||
@Override
|
||||
public Iterable<SAMRecord[]> alignAll(final SAMRecord read, final SAMFileHeader newHeader) {
|
||||
if(bwtFiles.autogenerated)
|
||||
throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable");
|
||||
final Iterable<Alignment[]> alignments = getAllAlignments(read.getReadBases());
|
||||
return new Iterable<SAMRecord[]>() {
|
||||
public Iterator<SAMRecord[]> iterator() {
|
||||
final Iterator<Alignment[]> alignmentIterator = alignments.iterator();
|
||||
return new Iterator<SAMRecord[]>() {
|
||||
/**
|
||||
* Whether all alignments have been seen based on the current position.
|
||||
* @return True if any more alignments are pending. False otherwise.
|
||||
*/
|
||||
public boolean hasNext() { return alignmentIterator.hasNext(); }
|
||||
|
||||
/**
|
||||
* Return the next cross-section of alignments, based on mapping quality.
|
||||
* @return Array of the next set of alignments of a given mapping quality.
|
||||
*/
|
||||
public SAMRecord[] next() {
|
||||
Alignment[] alignmentsOfQuality = alignmentIterator.next();
|
||||
SAMRecord[] reads = new SAMRecord[alignmentsOfQuality.length];
|
||||
for(int i = 0; i < alignmentsOfQuality.length; i++) {
|
||||
reads[i] = Alignment.convertToRead(alignmentsOfQuality[i],read,newHeader);
|
||||
}
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unsupported.
|
||||
*/
|
||||
public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); }
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the paths associated with the given base string.
|
||||
* @param bases List of bases.
|
||||
* @return A set of paths through the BWA.
|
||||
*/
|
||||
public BWAPath[] getPaths(byte[] bases) {
|
||||
if(thunkPointer == 0)
|
||||
throw new ReviewedStingException("BWA/C getPaths attempted, but BWA/C is not properly initialized.");
|
||||
return getPaths(thunkPointer,bases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a pointer to the BWA/C thunk.
|
||||
* @param files BWT source files.
|
||||
* @param configuration Configuration of the aligner.
|
||||
* @return Pointer to the BWA/C thunk.
|
||||
*/
|
||||
protected native long create(BWTFiles files, BWAConfiguration configuration);
|
||||
|
||||
/**
|
||||
* Update the configuration passed to the BWA aligner. For internal use only.
|
||||
* @param thunkPointer pointer to BWA object.
|
||||
* @param configuration New configuration to set.
|
||||
*/
|
||||
protected native void updateConfiguration(long thunkPointer, BWAConfiguration configuration);
|
||||
|
||||
/**
|
||||
* Destroy the BWA/C thunk.
|
||||
* @param thunkPointer Pointer to the allocated thunk.
|
||||
*/
|
||||
protected native void destroy(long thunkPointer);
|
||||
|
||||
/**
|
||||
* Do the extra steps involved in converting a local alignment to a global alignment.
|
||||
* @param bases ASCII representation of byte array.
|
||||
* @param paths Paths through the current BWT.
|
||||
* @return A list of alignments.
|
||||
*/
|
||||
protected Alignment[] convertPathsToAlignments(byte[] bases, BWAPath[] paths) {
|
||||
if(thunkPointer == 0)
|
||||
throw new ReviewedStingException("BWA/C convertPathsToAlignments attempted, but BWA/C is not properly initialized.");
|
||||
return convertPathsToAlignments(thunkPointer,bases,paths);
|
||||
}
|
||||
|
||||
/**
|
||||
* Caller to the path generation functionality within BWA/C. Call this method's getPaths() wrapper (above) instead.
|
||||
* @param thunkPointer pointer to the C++ object managing BWA/C.
|
||||
* @param bases ASCII representation of byte array.
|
||||
* @return A list of paths through the specified BWT.
|
||||
*/
|
||||
protected native BWAPath[] getPaths(long thunkPointer, byte[] bases);
|
||||
|
||||
/**
|
||||
* Do the extra steps involved in converting a local alignment to a global alignment.
|
||||
* Call this method's convertPathsToAlignments() wrapper (above) instead.
|
||||
* @param thunkPointer pointer to the C++ object managing BWA/C.
|
||||
* @param bases ASCII representation of byte array.
|
||||
* @param paths Paths through the current BWT.
|
||||
* @return A list of alignments.
|
||||
*/
|
||||
protected native Alignment[] convertPathsToAlignments(long thunkPointer, byte[] bases, BWAPath[] paths);
|
||||
|
||||
/**
|
||||
* Gets the best alignment from BWA/C, randomly selected from all best-aligned reads.
|
||||
* @param thunkPointer Pointer to BWA thunk.
|
||||
* @param bases bases to align.
|
||||
* @return The best alignment from BWA/C.
|
||||
*/
|
||||
protected native Alignment getBestAlignment(long thunkPointer, byte[] bases);
|
||||
}
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.alignment.bwa.c;
|
||||
|
||||
/**
|
||||
* Models a BWA path.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWAPath {
|
||||
/**
|
||||
* Number of mismatches encountered along this path.
|
||||
*/
|
||||
public final int numMismatches;
|
||||
|
||||
/**
|
||||
* Number of gap opens encountered along this path.
|
||||
*/
|
||||
public final int numGapOpens;
|
||||
|
||||
/**
|
||||
* Number of gap extensions along this path.
|
||||
*/
|
||||
public final int numGapExtensions;
|
||||
|
||||
/**
|
||||
* Whether this alignment was found on the positive or negative strand.
|
||||
*/
|
||||
public final boolean negativeStrand;
|
||||
|
||||
/**
|
||||
* Starting coordinate in the BWT.
|
||||
*/
|
||||
public final long k;
|
||||
|
||||
/**
|
||||
* Ending coordinate in the BWT.
|
||||
*/
|
||||
public final long l;
|
||||
|
||||
/**
|
||||
* The score of this path.
|
||||
*/
|
||||
public final int score;
|
||||
|
||||
/**
|
||||
* The number of best alignments seen along this path.
|
||||
*/
|
||||
public final int bestCount;
|
||||
|
||||
/**
|
||||
* The number of second best alignments seen along this path.
|
||||
*/
|
||||
public final int secondBestCount;
|
||||
|
||||
/**
|
||||
* Create a new path with the given attributes.
|
||||
* @param numMismatches Number of mismatches along path.
|
||||
* @param numGapOpens Number of gap opens along path.
|
||||
* @param numGapExtensions Number of gap extensions along path.
|
||||
* @param k Index to first coordinate within BWT.
|
||||
* @param l Index to last coordinate within BWT.
|
||||
* @param score Score of this alignment. Not the mapping quality.
|
||||
*/
|
||||
public BWAPath(int numMismatches, int numGapOpens, int numGapExtensions, boolean negativeStrand, long k, long l, int score, int bestCount, int secondBestCount) {
|
||||
this.numMismatches = numMismatches;
|
||||
this.numGapOpens = numGapOpens;
|
||||
this.numGapExtensions = numGapExtensions;
|
||||
this.negativeStrand = negativeStrand;
|
||||
this.k = k;
|
||||
this.l = l;
|
||||
this.score = score;
|
||||
this.bestCount = bestCount;
|
||||
this.secondBestCount = secondBestCount;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,165 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.java;
|
||||
|
||||
import org.broadinstitute.sting.alignment.Aligner;
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
|
||||
/**
|
||||
* A test harness to ensure that the perfect aligner works.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class AlignerTestHarness {
|
||||
public static void main( String argv[] ) throws FileNotFoundException {
|
||||
if( argv.length != 6 ) {
|
||||
System.out.println("PerfectAlignerTestHarness <fasta> <bwt> <rbwt> <sa> <rsa> <bam>");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
File referenceFile = new File(argv[0]);
|
||||
File bwtFile = new File(argv[1]);
|
||||
File rbwtFile = new File(argv[2]);
|
||||
File suffixArrayFile = new File(argv[3]);
|
||||
File reverseSuffixArrayFile = new File(argv[4]);
|
||||
File bamFile = new File(argv[5]);
|
||||
|
||||
align(referenceFile,bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile,bamFile);
|
||||
}
|
||||
|
||||
private static void align(File referenceFile, File bwtFile, File rbwtFile, File suffixArrayFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException {
|
||||
Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile);
|
||||
int count = 0;
|
||||
|
||||
SAMFileReader reader = new SAMFileReader(bamFile);
|
||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||
|
||||
int mismatches = 0;
|
||||
int failures = 0;
|
||||
|
||||
for(SAMRecord read: reader) {
|
||||
count++;
|
||||
if( count > 200000 ) break;
|
||||
//if( count < 366000 ) continue;
|
||||
//if( count > 2 ) break;
|
||||
//if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") )
|
||||
// continue;
|
||||
//if( !read.getReadName().endsWith("SL-XBC:1:36:30:1926#0") )
|
||||
// continue;
|
||||
//if( !read.getReadName().endsWith("SL-XBC:1:60:1342:1340#0") )
|
||||
// continue;
|
||||
|
||||
SAMRecord alignmentCleaned = null;
|
||||
try {
|
||||
alignmentCleaned = (SAMRecord)read.clone();
|
||||
}
|
||||
catch( CloneNotSupportedException ex ) {
|
||||
throw new ReviewedStingException("SAMRecord clone not supported", ex);
|
||||
}
|
||||
|
||||
if( alignmentCleaned.getReadNegativeStrandFlag() )
|
||||
alignmentCleaned.setReadBases(BaseUtils.simpleReverseComplement(alignmentCleaned.getReadBases()));
|
||||
|
||||
alignmentCleaned.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
|
||||
alignmentCleaned.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
|
||||
alignmentCleaned.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
|
||||
alignmentCleaned.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
|
||||
|
||||
// Clear everything except flags pertaining to pairing and set 'unmapped' status to true.
|
||||
alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C);
|
||||
|
||||
Iterable<Alignment[]> alignments = aligner.getAllAlignments(alignmentCleaned.getReadBases());
|
||||
if(!alignments.iterator().hasNext() ) {
|
||||
//throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
|
||||
System.out.printf("Unable to align read %s to reference; count = %d%n",read.getReadName(),count);
|
||||
failures++;
|
||||
}
|
||||
|
||||
Alignment foundAlignment = null;
|
||||
for(Alignment[] alignmentsOfQuality: alignments) {
|
||||
for(Alignment alignment: alignmentsOfQuality) {
|
||||
if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() )
|
||||
continue;
|
||||
if( read.getAlignmentStart() != alignment.getAlignmentStart() )
|
||||
continue;
|
||||
|
||||
foundAlignment = alignment;
|
||||
}
|
||||
}
|
||||
|
||||
if( foundAlignment != null ) {
|
||||
//System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions());
|
||||
}
|
||||
else {
|
||||
System.out.printf("Error aligning read %s%n", read.getReadName());
|
||||
|
||||
mismatches++;
|
||||
|
||||
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
||||
|
||||
System.out.printf("read = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(read.getReadString(),read.getCigar(),CigarOperator.DELETION),
|
||||
read.getAlignmentStart(),
|
||||
read.getReadNegativeStrandFlag());
|
||||
int numDeletions = numDeletionsInCigar(read.getCigar());
|
||||
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()+numDeletions-1).getBases());
|
||||
System.out.printf("expected ref = %s%n", formatBasesBasedOnCigar(expectedRef,read.getCigar(),CigarOperator.INSERTION));
|
||||
|
||||
for(Alignment[] alignmentsOfQuality: alignments) {
|
||||
for(Alignment alignment: alignmentsOfQuality) {
|
||||
System.out.println();
|
||||
|
||||
Cigar cigar = ((BWAAlignment)alignment).getCigar();
|
||||
|
||||
System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION));
|
||||
|
||||
int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
||||
String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases());
|
||||
System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION),
|
||||
alignment.getAlignmentStart(),
|
||||
alignment.isNegativeStrand());
|
||||
}
|
||||
}
|
||||
|
||||
//throw new StingException(String.format("Read %s was placed at incorrect location; count = %d%n",read.getReadName(),count));
|
||||
}
|
||||
|
||||
|
||||
if( count % 1000 == 0 )
|
||||
System.out.printf("%d reads examined.%n",count);
|
||||
}
|
||||
|
||||
System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures);
|
||||
}
|
||||
|
||||
private static String formatBasesBasedOnCigar( String bases, Cigar cigar, CigarOperator toBlank ) {
|
||||
StringBuilder formatted = new StringBuilder();
|
||||
int readIndex = 0;
|
||||
for(CigarElement cigarElement: cigar.getCigarElements()) {
|
||||
if(cigarElement.getOperator() == toBlank) {
|
||||
int number = cigarElement.getLength();
|
||||
while( number-- > 0 ) formatted.append(' ');
|
||||
}
|
||||
else {
|
||||
int number = cigarElement.getLength();
|
||||
while( number-- > 0 ) formatted.append(bases.charAt(readIndex++));
|
||||
}
|
||||
}
|
||||
return formatted.toString();
|
||||
}
|
||||
|
||||
private static int numDeletionsInCigar( Cigar cigar ) {
|
||||
int numDeletions = 0;
|
||||
for(CigarElement cigarElement: cigar.getCigarElements()) {
|
||||
if(cigarElement.getOperator() == CigarOperator.DELETION)
|
||||
numDeletions += cigarElement.getLength();
|
||||
}
|
||||
return numDeletions;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,151 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.java;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Deque;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.Iterator;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
|
||||
/**
|
||||
* Represents a sequence of matches.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class AlignmentMatchSequence implements Cloneable {
|
||||
/**
|
||||
* Stores the particular match entries in the order they occur.
|
||||
*/
|
||||
private Deque<AlignmentMatchSequenceEntry> entries = new ArrayDeque<AlignmentMatchSequenceEntry>();
|
||||
|
||||
/**
|
||||
* Clone the given match sequence.
|
||||
* @return A deep copy of the current match sequence.
|
||||
*/
|
||||
public AlignmentMatchSequence clone() {
|
||||
AlignmentMatchSequence copy = null;
|
||||
try {
|
||||
copy = (AlignmentMatchSequence)super.clone();
|
||||
}
|
||||
catch( CloneNotSupportedException ex ) {
|
||||
throw new ReviewedStingException("Unable to clone AlignmentMatchSequence.");
|
||||
}
|
||||
|
||||
copy.entries = new ArrayDeque<AlignmentMatchSequenceEntry>();
|
||||
for( AlignmentMatchSequenceEntry entry: entries )
|
||||
copy.entries.add(entry.clone());
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
public Cigar convertToCigar(boolean negativeStrand) {
|
||||
Cigar cigar = new Cigar();
|
||||
Iterator<AlignmentMatchSequenceEntry> iterator = negativeStrand ? entries.descendingIterator() : entries.iterator();
|
||||
while( iterator.hasNext() ) {
|
||||
AlignmentMatchSequenceEntry entry = iterator.next();
|
||||
CigarOperator operator;
|
||||
switch( entry.getAlignmentState() ) {
|
||||
case MATCH_MISMATCH: operator = CigarOperator.MATCH_OR_MISMATCH; break;
|
||||
case INSERTION: operator = CigarOperator.INSERTION; break;
|
||||
case DELETION: operator = CigarOperator.DELETION; break;
|
||||
default: throw new ReviewedStingException("convertToCigar: cannot process state: " + entry.getAlignmentState());
|
||||
}
|
||||
cigar.add( new CigarElement(entry.count,operator) );
|
||||
}
|
||||
return cigar;
|
||||
}
|
||||
|
||||
/**
|
||||
* All a new alignment of the given state.
|
||||
* @param state State to add to the sequence.
|
||||
*/
|
||||
public void addNext( AlignmentState state ) {
|
||||
AlignmentMatchSequenceEntry last = entries.peekLast();
|
||||
// If the last entry is the same as this one, increment it. Otherwise, add a new entry.
|
||||
if( last != null && last.alignmentState == state )
|
||||
last.increment();
|
||||
else
|
||||
entries.add(new AlignmentMatchSequenceEntry(state));
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current state of this alignment (what's the state of the last base?)
|
||||
* @return State of the most recently aligned base.
|
||||
*/
|
||||
public AlignmentState getCurrentState() {
|
||||
if( entries.size() == 0 )
|
||||
return AlignmentState.MATCH_MISMATCH;
|
||||
return entries.peekLast().getAlignmentState();
|
||||
}
|
||||
|
||||
/**
|
||||
* How many bases in the read match the given state.
|
||||
* @param state State to test.
|
||||
* @return number of bases which match that state.
|
||||
*/
|
||||
public int getNumberOfBasesMatchingState(AlignmentState state) {
|
||||
int matches = 0;
|
||||
for( AlignmentMatchSequenceEntry entry: entries ) {
|
||||
if( entry.getAlignmentState() == state )
|
||||
matches += entry.count;
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores an individual match sequence entry.
|
||||
*/
|
||||
private class AlignmentMatchSequenceEntry implements Cloneable {
|
||||
/**
|
||||
* The state of the alignment throughout a given point in the sequence.
|
||||
*/
|
||||
private final AlignmentState alignmentState;
|
||||
|
||||
/**
|
||||
* The number of bases having this particular state.
|
||||
*/
|
||||
private int count;
|
||||
|
||||
/**
|
||||
* Create a new sequence entry with the given state.
|
||||
* @param alignmentState The state that this sequence should contain.
|
||||
*/
|
||||
AlignmentMatchSequenceEntry( AlignmentState alignmentState ) {
|
||||
this.alignmentState = alignmentState;
|
||||
this.count = 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clone the given match sequence entry.
|
||||
* @return A deep copy of the current match sequence entry.
|
||||
*/
|
||||
public AlignmentMatchSequenceEntry clone() {
|
||||
try {
|
||||
return (AlignmentMatchSequenceEntry)super.clone();
|
||||
}
|
||||
catch( CloneNotSupportedException ex ) {
|
||||
throw new ReviewedStingException("Unable to clone AlignmentMatchSequenceEntry.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the current state of the alignment.
|
||||
* @return The state of the current sequence.
|
||||
*/
|
||||
AlignmentState getAlignmentState() {
|
||||
return alignmentState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increment the count of alignments having this particular state.
|
||||
*/
|
||||
void increment() {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.java;
|
||||
|
||||
/**
|
||||
* The state of a given base in the alignment.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public enum AlignmentState {
|
||||
MATCH_MISMATCH,
|
||||
INSERTION,
|
||||
DELETION
|
||||
}
|
||||
|
|
@ -0,0 +1,190 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.java;
|
||||
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import net.sf.samtools.Cigar;
|
||||
|
||||
/**
|
||||
* An alignment object to be used incrementally as the BWA aligner
|
||||
* inspects the read.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWAAlignment extends Alignment implements Cloneable {
|
||||
/**
|
||||
* Track the number of alignments that have been created.
|
||||
*/
|
||||
private static long numCreated;
|
||||
|
||||
/**
|
||||
* Which number alignment is this?
|
||||
*/
|
||||
private long creationNumber;
|
||||
|
||||
/**
|
||||
* The aligner performing the alignments.
|
||||
*/
|
||||
protected BWAJavaAligner aligner;
|
||||
|
||||
/**
|
||||
* The sequence of matches/mismatches/insertions/deletions.
|
||||
*/
|
||||
private AlignmentMatchSequence alignmentMatchSequence = new AlignmentMatchSequence();
|
||||
|
||||
/**
|
||||
* Working variable. How many bases have been matched at this point.
|
||||
*/
|
||||
protected int position;
|
||||
|
||||
/**
|
||||
* Working variable. How many mismatches have been encountered at this point.
|
||||
*/
|
||||
private int mismatches;
|
||||
|
||||
/**
|
||||
* Number of gap opens in alignment.
|
||||
*/
|
||||
private int gapOpens;
|
||||
|
||||
/**
|
||||
* Number of gap extensions in alignment.
|
||||
*/
|
||||
private int gapExtensions;
|
||||
|
||||
/**
|
||||
* Working variable. The lower bound of the alignment within the BWT.
|
||||
*/
|
||||
protected long loBound;
|
||||
|
||||
/**
|
||||
* Working variable. The upper bound of the alignment within the BWT.
|
||||
*/
|
||||
protected long hiBound;
|
||||
|
||||
protected void setAlignmentStart(long position) {
|
||||
this.alignmentStart = position;
|
||||
}
|
||||
|
||||
protected void setNegativeStrand(boolean negativeStrand) {
|
||||
this.negativeStrand = negativeStrand;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache the score.
|
||||
*/
|
||||
private int score;
|
||||
|
||||
public Cigar getCigar() {
|
||||
return alignmentMatchSequence.convertToCigar(isNegativeStrand());
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current state of this alignment (state of the last base viewed)..
|
||||
* @return Current state of the alignment.
|
||||
*/
|
||||
public AlignmentState getCurrentState() {
|
||||
return alignmentMatchSequence.getCurrentState();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given state to the current alignment.
|
||||
* @param state State to add to the given alignment.
|
||||
*/
|
||||
public void addState( AlignmentState state ) {
|
||||
alignmentMatchSequence.addNext(state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the BWA score of this alignment.
|
||||
* @return BWA-style scores. 0 is best.
|
||||
*/
|
||||
public int getScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
public int getMismatches() { return mismatches; }
|
||||
public int getGapOpens() { return gapOpens; }
|
||||
public int getGapExtensions() { return gapExtensions; }
|
||||
|
||||
public void incrementMismatches() {
|
||||
this.mismatches++;
|
||||
updateScore();
|
||||
}
|
||||
|
||||
public void incrementGapOpens() {
|
||||
this.gapOpens++;
|
||||
updateScore();
|
||||
}
|
||||
|
||||
public void incrementGapExtensions() {
|
||||
this.gapExtensions++;
|
||||
updateScore();
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the score based on new information about matches / mismatches.
|
||||
*/
|
||||
private void updateScore() {
|
||||
score = mismatches*aligner.MISMATCH_PENALTY + gapOpens*aligner.GAP_OPEN_PENALTY + gapExtensions*aligner.GAP_EXTENSION_PENALTY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new alignment with the given parent aligner.
|
||||
* @param aligner Aligner being used.
|
||||
*/
|
||||
public BWAAlignment( BWAJavaAligner aligner ) {
|
||||
this.aligner = aligner;
|
||||
this.creationNumber = numCreated++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clone the alignment.
|
||||
* @return New instance of the alignment.
|
||||
*/
|
||||
public BWAAlignment clone() {
|
||||
BWAAlignment newAlignment = null;
|
||||
try {
|
||||
newAlignment = (BWAAlignment)super.clone();
|
||||
}
|
||||
catch( CloneNotSupportedException ex ) {
|
||||
throw new ReviewedStingException("Unable to clone BWAAlignment.");
|
||||
}
|
||||
newAlignment.creationNumber = numCreated++;
|
||||
newAlignment.alignmentMatchSequence = alignmentMatchSequence.clone();
|
||||
|
||||
return newAlignment;
|
||||
}
|
||||
|
||||
/**
|
||||
* How many bases in the read match the given state.
|
||||
* @param state State to test.
|
||||
* @return number of bases which match that state.
|
||||
*/
|
||||
public int getNumberOfBasesMatchingState(AlignmentState state) {
|
||||
return alignmentMatchSequence.getNumberOfBasesMatchingState(state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare this alignment to another alignment.
|
||||
* @param rhs Other alignment to which to compare.
|
||||
* @return < 0 if this < other, == 0 if this == other, > 0 if this > other
|
||||
*/
|
||||
public int compareTo(Alignment rhs) {
|
||||
BWAAlignment other = (BWAAlignment)rhs;
|
||||
|
||||
// If the scores are different, disambiguate using the score.
|
||||
if(score != other.score)
|
||||
return score > other.score ? 1 : -1;
|
||||
|
||||
// Otherwise, use the order in which the elements were created.
|
||||
if(creationNumber != other.creationNumber)
|
||||
return creationNumber > other.creationNumber ? -1 : 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("position: %d, strand: %b, state: %s, mismatches: %d, gap opens: %d, gap extensions: %d, loBound: %d, hiBound: %d, score: %d, creationNumber: %d", position, negativeStrand, alignmentMatchSequence.getCurrentState(), mismatches, gapOpens, gapExtensions, loBound, hiBound, getScore(), creationNumber);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,392 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.java;
|
||||
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.*;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
/**
|
||||
* Create imperfect alignments from the read to the genome represented by the given BWT / suffix array.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWAJavaAligner extends BWAAligner {
|
||||
/**
|
||||
* BWT in the forward direction.
|
||||
*/
|
||||
private BWT forwardBWT;
|
||||
|
||||
/**
|
||||
* BWT in the reverse direction.
|
||||
*/
|
||||
private BWT reverseBWT;
|
||||
|
||||
/**
|
||||
* Suffix array in the forward direction.
|
||||
*/
|
||||
private SuffixArray forwardSuffixArray;
|
||||
|
||||
/**
|
||||
* Suffix array in the reverse direction.
|
||||
*/
|
||||
private SuffixArray reverseSuffixArray;
|
||||
|
||||
/**
|
||||
* Maximum edit distance (-n option from original BWA).
|
||||
*/
|
||||
private final int MAXIMUM_EDIT_DISTANCE = 4;
|
||||
|
||||
/**
|
||||
* Maximum number of gap opens (-o option from original BWA).
|
||||
*/
|
||||
private final int MAXIMUM_GAP_OPENS = 1;
|
||||
|
||||
/**
|
||||
* Maximum number of gap extensions (-e option from original BWA).
|
||||
*/
|
||||
private final int MAXIMUM_GAP_EXTENSIONS = 6;
|
||||
|
||||
/**
|
||||
* Penalty for straight mismatches (-M option from original BWA).
|
||||
*/
|
||||
public final int MISMATCH_PENALTY = 3;
|
||||
|
||||
/**
|
||||
* Penalty for gap opens (-O option from original BWA).
|
||||
*/
|
||||
public final int GAP_OPEN_PENALTY = 11;
|
||||
|
||||
/**
|
||||
* Penalty for gap extensions (-E option from original BWA).
|
||||
*/
|
||||
public final int GAP_EXTENSION_PENALTY = 4;
|
||||
|
||||
/**
|
||||
* Skip the ends of indels.
|
||||
*/
|
||||
public final int INDEL_END_SKIP = 5;
|
||||
|
||||
public BWAJavaAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) {
|
||||
super(null,null);
|
||||
forwardBWT = new BWTReader(forwardBWTFile).read();
|
||||
reverseBWT = new BWTReader(reverseBWTFile).read();
|
||||
forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile,forwardBWT).read();
|
||||
reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile,reverseBWT).read();
|
||||
}
|
||||
|
||||
/**
|
||||
* Close this instance of the BWA pointer and delete its resources.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
throw new UnsupportedOperationException("BWA aligner can't currently be closed.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the current parameters of this aligner.
|
||||
* @param configuration New configuration to set.
|
||||
*/
|
||||
public void updateConfiguration(BWAConfiguration configuration) {
|
||||
throw new UnsupportedOperationException("Configuration of the BWA aligner can't currently be changed.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
|
||||
* @param bases Bases to align.
|
||||
* @return An align
|
||||
*/
|
||||
public Alignment getBestAlignment(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
|
||||
|
||||
/**
|
||||
* Align the read to the reference.
|
||||
* @param read Read to align.
|
||||
* @param header Optional header to drop in place.
|
||||
* @return A list of the alignments.
|
||||
*/
|
||||
public SAMRecord align(final SAMRecord read, final SAMFileHeader header) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
|
||||
|
||||
/**
|
||||
* Get a iterator of alignments, batched by mapping quality.
|
||||
* @param bases List of bases.
|
||||
* @return Iterator to alignments.
|
||||
*/
|
||||
public Iterable<Alignment[]> getAllAlignments(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
|
||||
|
||||
/**
|
||||
* Get a iterator of aligned reads, batched by mapping quality.
|
||||
* @param read Read to align.
|
||||
* @param newHeader Optional new header to use when aligning the read. If present, it must be null.
|
||||
* @return Iterator to alignments.
|
||||
*/
|
||||
public Iterable<SAMRecord[]> alignAll(final SAMRecord read, final SAMFileHeader newHeader) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
|
||||
|
||||
|
||||
public List<Alignment> align( SAMRecord read ) {
|
||||
List<Alignment> successfulMatches = new ArrayList<Alignment>();
|
||||
|
||||
Byte[] uncomplementedBases = normalizeBases(read.getReadBases());
|
||||
Byte[] complementedBases = normalizeBases(Utils.reverse(BaseUtils.simpleReverseComplement(read.getReadBases())));
|
||||
|
||||
List<LowerBound> forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT);
|
||||
List<LowerBound> reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT);
|
||||
|
||||
// Seed the best score with any score that won't overflow on comparison.
|
||||
int bestScore = Integer.MAX_VALUE - MISMATCH_PENALTY;
|
||||
int bestDiff = MAXIMUM_EDIT_DISTANCE+1;
|
||||
int maxDiff = MAXIMUM_EDIT_DISTANCE;
|
||||
|
||||
PriorityQueue<BWAAlignment> alignments = new PriorityQueue<BWAAlignment>();
|
||||
|
||||
// Create a fictional initial alignment, with the position just off the end of the read, and the limits
|
||||
// set as the entire BWT.
|
||||
alignments.add(createSeedAlignment(reverseBWT));
|
||||
alignments.add(createSeedAlignment(forwardBWT));
|
||||
|
||||
while(!alignments.isEmpty()) {
|
||||
BWAAlignment alignment = alignments.remove();
|
||||
|
||||
// From bwtgap.c in the original BWT; if the rank is worse than the best score + the mismatch PENALTY, move on.
|
||||
if( alignment.getScore() > bestScore + MISMATCH_PENALTY )
|
||||
break;
|
||||
|
||||
Byte[] bases = alignment.isNegativeStrand() ? complementedBases : uncomplementedBases;
|
||||
BWT bwt = alignment.isNegativeStrand() ? forwardBWT : reverseBWT;
|
||||
List<LowerBound> lowerBounds = alignment.isNegativeStrand() ? reverseLowerBounds : forwardLowerBounds;
|
||||
|
||||
// if z < D(i) then return {}
|
||||
int mismatches = maxDiff - alignment.getMismatches() - alignment.getGapOpens() - alignment.getGapExtensions();
|
||||
if( alignment.position < lowerBounds.size()-1 && mismatches < lowerBounds.get(alignment.position+1).value )
|
||||
continue;
|
||||
|
||||
if(mismatches == 0) {
|
||||
exactMatch(alignment,bases,bwt);
|
||||
if(alignment.loBound > alignment.hiBound)
|
||||
continue;
|
||||
}
|
||||
|
||||
// Found a valid alignment; store it and move on.
|
||||
if(alignment.position >= read.getReadLength()-1) {
|
||||
for(long bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++) {
|
||||
BWAAlignment finalAlignment = alignment.clone();
|
||||
|
||||
if( finalAlignment.isNegativeStrand() )
|
||||
finalAlignment.setAlignmentStart(forwardSuffixArray.get(bwtIndex) + 1);
|
||||
else {
|
||||
int sizeAlongReference = read.getReadLength() -
|
||||
finalAlignment.getNumberOfBasesMatchingState(AlignmentState.INSERTION) +
|
||||
finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
||||
finalAlignment.setAlignmentStart(reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1);
|
||||
}
|
||||
|
||||
successfulMatches.add(finalAlignment);
|
||||
|
||||
bestScore = Math.min(finalAlignment.getScore(),bestScore);
|
||||
bestDiff = Math.min(finalAlignment.getMismatches()+finalAlignment.getGapOpens()+finalAlignment.getGapExtensions(),bestDiff);
|
||||
maxDiff = bestDiff + 1;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
//System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d, base = %s%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value, alignment.position >= 0 ? (char)bases[alignment.position].byteValue() : "");
|
||||
/*
|
||||
System.out.printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%d,%d]\t[%d,%d]%n",alignments.size(),
|
||||
alignment.negativeStrand?1:0,
|
||||
bases.length-alignment.position-1,
|
||||
alignment.getCurrentState().toString().charAt(0),
|
||||
alignment.getMismatches(),
|
||||
alignment.getGapOpens(),
|
||||
alignment.getGapExtensions(),
|
||||
lowerBounds.get(alignment.position+1).value,
|
||||
lowerBounds.get(alignment.position+1).width,
|
||||
alignment.loBound,
|
||||
alignment.hiBound);
|
||||
*/
|
||||
|
||||
// Temporary -- look ahead to see if the next alignment is bounded.
|
||||
boolean allowDifferences = mismatches > 0;
|
||||
boolean allowMismatches = mismatches > 0;
|
||||
|
||||
if( allowDifferences &&
|
||||
alignment.position+1 >= INDEL_END_SKIP-1+alignment.getGapOpens()+alignment.getGapExtensions() &&
|
||||
read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.getGapOpens()+alignment.getGapExtensions() ) {
|
||||
if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) {
|
||||
if( alignment.getGapOpens() < MAXIMUM_GAP_OPENS ) {
|
||||
// Add a potential insertion extension.
|
||||
BWAAlignment insertionAlignment = createInsertionAlignment(alignment);
|
||||
insertionAlignment.incrementGapOpens();
|
||||
alignments.add(insertionAlignment);
|
||||
|
||||
// Add a potential deletion by marking a deletion and augmenting the position.
|
||||
List<BWAAlignment> deletionAlignments = createDeletionAlignments(bwt,alignment);
|
||||
for( BWAAlignment deletionAlignment: deletionAlignments )
|
||||
deletionAlignment.incrementGapOpens();
|
||||
alignments.addAll(deletionAlignments);
|
||||
}
|
||||
}
|
||||
else if( alignment.getCurrentState() == AlignmentState.INSERTION ) {
|
||||
if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) {
|
||||
// Add a potential insertion extension.
|
||||
BWAAlignment insertionAlignment = createInsertionAlignment(alignment);
|
||||
insertionAlignment.incrementGapExtensions();
|
||||
alignments.add(insertionAlignment);
|
||||
}
|
||||
}
|
||||
else if( alignment.getCurrentState() == AlignmentState.DELETION ) {
|
||||
if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) {
|
||||
// Add a potential deletion by marking a deletion and augmenting the position.
|
||||
List<BWAAlignment> deletionAlignments = createDeletionAlignments(bwt,alignment);
|
||||
for( BWAAlignment deletionAlignment: deletionAlignments )
|
||||
deletionAlignment.incrementGapExtensions();
|
||||
alignments.addAll(deletionAlignments);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mismatches
|
||||
alignments.addAll(createMatchedAlignments(bwt,alignment,bases,allowDifferences&&allowMismatches));
|
||||
}
|
||||
|
||||
return successfulMatches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an seeding alignment to use as a starting point when traversing.
|
||||
* @param bwt source BWT.
|
||||
* @return Seed alignment.
|
||||
*/
|
||||
private BWAAlignment createSeedAlignment(BWT bwt) {
|
||||
BWAAlignment seed = new BWAAlignment(this);
|
||||
seed.setNegativeStrand(bwt == forwardBWT);
|
||||
seed.position = -1;
|
||||
seed.loBound = 0;
|
||||
seed.hiBound = bwt.length();
|
||||
return seed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new alignments representing direct matches / mismatches.
|
||||
* @param bwt Source BWT with which to work.
|
||||
* @param alignment Alignment for the previous position.
|
||||
* @param bases The bases in the read.
|
||||
* @param allowMismatch Should mismatching bases be allowed?
|
||||
* @return New alignment representing this position if valid; null otherwise.
|
||||
*/
|
||||
private List<BWAAlignment> createMatchedAlignments( BWT bwt, BWAAlignment alignment, Byte[] bases, boolean allowMismatch ) {
|
||||
List<BWAAlignment> newAlignments = new ArrayList<BWAAlignment>();
|
||||
|
||||
List<Byte> baseChoices = new ArrayList<Byte>();
|
||||
Byte thisBase = bases[alignment.position+1];
|
||||
|
||||
if( allowMismatch )
|
||||
baseChoices.addAll(Bases.allOf());
|
||||
else
|
||||
baseChoices.add(thisBase);
|
||||
|
||||
if( thisBase != null ) {
|
||||
// Keep rotating the current base to the last position until we've hit the current base.
|
||||
for( ;; ) {
|
||||
baseChoices.add(baseChoices.remove(0));
|
||||
if( thisBase.equals(baseChoices.get(baseChoices.size()-1)) )
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for(byte base: baseChoices) {
|
||||
BWAAlignment newAlignment = alignment.clone();
|
||||
|
||||
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
||||
|
||||
// If this alignment is valid, skip it.
|
||||
if( newAlignment.loBound > newAlignment.hiBound )
|
||||
continue;
|
||||
|
||||
newAlignment.position++;
|
||||
newAlignment.addState(AlignmentState.MATCH_MISMATCH);
|
||||
if( bases[newAlignment.position] == null || base != bases[newAlignment.position] )
|
||||
newAlignment.incrementMismatches();
|
||||
|
||||
newAlignments.add(newAlignment);
|
||||
}
|
||||
|
||||
return newAlignments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new alignment representing an insertion at this point in the read.
|
||||
* @param alignment Alignment from which to derive the insertion.
|
||||
* @return New alignment reflecting the insertion.
|
||||
*/
|
||||
private BWAAlignment createInsertionAlignment( BWAAlignment alignment ) {
|
||||
// Add a potential insertion extension.
|
||||
BWAAlignment newAlignment = alignment.clone();
|
||||
newAlignment.position++;
|
||||
newAlignment.addState(AlignmentState.INSERTION);
|
||||
return newAlignment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create new alignments representing a deletion at this point in the read.
|
||||
* @param bwt source BWT for inferring deletion info.
|
||||
* @param alignment Alignment from which to derive the deletion.
|
||||
* @return New alignments reflecting all possible deletions.
|
||||
*/
|
||||
private List<BWAAlignment> createDeletionAlignments( BWT bwt, BWAAlignment alignment) {
|
||||
List<BWAAlignment> newAlignments = new ArrayList<BWAAlignment>();
|
||||
for(byte base: Bases.instance) {
|
||||
BWAAlignment newAlignment = alignment.clone();
|
||||
|
||||
newAlignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||
newAlignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
||||
|
||||
// If this alignment is valid, skip it.
|
||||
if( newAlignment.loBound > newAlignment.hiBound )
|
||||
continue;
|
||||
|
||||
newAlignment.addState(AlignmentState.DELETION);
|
||||
|
||||
newAlignments.add(newAlignment);
|
||||
}
|
||||
|
||||
return newAlignments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Exactly match the given alignment against the given BWT.
|
||||
* @param alignment Alignment to match.
|
||||
* @param bases Bases to use.
|
||||
* @param bwt BWT to use.
|
||||
*/
|
||||
private void exactMatch( BWAAlignment alignment, Byte[] bases, BWT bwt ) {
|
||||
while( ++alignment.position < bases.length ) {
|
||||
byte base = bases[alignment.position];
|
||||
alignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||
alignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
||||
if( alignment.loBound > alignment.hiBound )
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Make each base into A/C/G/T or null if unknown.
|
||||
* @param bases Base string to normalize.
|
||||
* @return Array of normalized bases.
|
||||
*/
|
||||
private Byte[] normalizeBases( byte[] bases ) {
|
||||
Byte[] normalBases = new Byte[bases.length];
|
||||
for(int i = 0; i < bases.length; i++)
|
||||
normalBases[i] = Bases.fromASCII(bases[i]);
|
||||
return normalBases;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.java;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.BWT;
|
||||
|
||||
/**
|
||||
* At any point along the given read, what is a good lower bound for the
|
||||
* total number of differences?
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class LowerBound {
|
||||
/**
|
||||
* Lower bound of the suffix array.
|
||||
*/
|
||||
public final long loIndex;
|
||||
|
||||
/**
|
||||
* Upper bound of the suffix array.
|
||||
*/
|
||||
public final long hiIndex;
|
||||
|
||||
/**
|
||||
* Width of the bwt from loIndex -> hiIndex, inclusive.
|
||||
*/
|
||||
public final long width;
|
||||
|
||||
/**
|
||||
* The lower bound at the given point.
|
||||
*/
|
||||
public final int value;
|
||||
|
||||
/**
|
||||
* Create a new lower bound with the given value.
|
||||
* @param loIndex The lower bound of the BWT.
|
||||
* @param hiIndex The upper bound of the BWT.
|
||||
* @param value Value for the lower bound at this site.
|
||||
*/
|
||||
private LowerBound(long loIndex, long hiIndex, int value) {
|
||||
this.loIndex = loIndex;
|
||||
this.hiIndex = hiIndex;
|
||||
this.width = hiIndex - loIndex + 1;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a non-optimal bound according to the algorithm specified in Figure 3 of the BWA paper.
|
||||
* @param bases Bases of the read to use when creating a new BWT.
|
||||
* @param bwt BWT to check against.
|
||||
* @return A list of lower bounds at every point in the reference.
|
||||
*
|
||||
*/
|
||||
public static List<LowerBound> create(Byte[] bases, BWT bwt) {
|
||||
List<LowerBound> bounds = new ArrayList<LowerBound>();
|
||||
|
||||
long loIndex = 0, hiIndex = bwt.length();
|
||||
int mismatches = 0;
|
||||
for( int i = bases.length-1; i >= 0; i-- ) {
|
||||
Byte base = bases[i];
|
||||
|
||||
// Ignore non-ACGT bases.
|
||||
if( base != null ) {
|
||||
loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1;
|
||||
hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex);
|
||||
}
|
||||
|
||||
if( base == null || loIndex > hiIndex ) {
|
||||
loIndex = 0;
|
||||
hiIndex = bwt.length();
|
||||
mismatches++;
|
||||
}
|
||||
bounds.add(0,new LowerBound(loIndex,hiIndex,mismatches));
|
||||
}
|
||||
|
||||
return bounds;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a string representation of this bound.
|
||||
* @return String version of this bound.
|
||||
*/
|
||||
public String toString() {
|
||||
return String.format("LowerBound: w = %d, value = %d",width,value);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
/**
|
||||
* Analyses used to validate the correctness and performance the BWA Java bindings.
|
||||
*/
|
||||
package org.broadinstitute.sting.alignment;
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* Writes .amb files - a file indicating where 'holes' (indeterminant bases)
|
||||
* exist in the contig. Currently, only empty, placeholder AMBs are supported.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class AMBWriter {
|
||||
/**
|
||||
* Number of holes is fixed at zero.
|
||||
*/
|
||||
private static final int NUM_HOLES = 0;
|
||||
|
||||
/**
|
||||
* Input stream from which to read BWT data.
|
||||
*/
|
||||
private final PrintStream out;
|
||||
|
||||
/**
|
||||
* Create a new ANNWriter targeting the given file.
|
||||
* @param file file into which ANN data should be written.
|
||||
* @throws java.io.IOException if there is a problem opening the output file.
|
||||
*/
|
||||
public AMBWriter(File file) throws IOException {
|
||||
out = new PrintStream(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new ANNWriter targeting the given OutputStream.
|
||||
* @param stream Stream into which ANN data should be written.
|
||||
*/
|
||||
public AMBWriter(OutputStream stream) {
|
||||
out = new PrintStream(stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the contents of the given dictionary into the AMB file.
|
||||
* Assumes that there are no holes in the dictionary.
|
||||
* @param dictionary Dictionary to write.
|
||||
*/
|
||||
public void writeEmpty(SAMSequenceDictionary dictionary) {
|
||||
long genomeLength = 0L;
|
||||
for(SAMSequenceRecord sequence: dictionary.getSequences())
|
||||
genomeLength += sequence.getSequenceLength();
|
||||
|
||||
int sequences = dictionary.getSequences().size();
|
||||
|
||||
// Write the header
|
||||
out.printf("%d %d %d%n",genomeLength,sequences,NUM_HOLES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the given output stream.
|
||||
*/
|
||||
public void close() {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* Writes .ann files - an alternate sequence dictionary format
|
||||
* used by BWA/C. For best results, the input sequence dictionary
|
||||
* should be created with Picard's CreateSequenceDictionary.jar,
|
||||
* TRUNCATE_NAMES_AT_WHITESPACE=false.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class ANNWriter {
|
||||
/**
|
||||
* BWA uses a fixed seed of 11, written into every file.
|
||||
*/
|
||||
private static final int BNS_SEED = 11;
|
||||
|
||||
/**
|
||||
* A seemingly unused value that appears in every contig in the ANN.
|
||||
*/
|
||||
private static final int GI = 0;
|
||||
|
||||
/**
|
||||
* Input stream from which to read BWT data.
|
||||
*/
|
||||
private final PrintStream out;
|
||||
|
||||
/**
|
||||
* Create a new ANNWriter targeting the given file.
|
||||
* @param file file into which ANN data should be written.
|
||||
* @throws IOException if there is a problem opening the output file.
|
||||
*/
|
||||
public ANNWriter(File file) throws IOException {
|
||||
out = new PrintStream(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new ANNWriter targeting the given OutputStream.
|
||||
* @param stream Stream into which ANN data should be written.
|
||||
*/
|
||||
public ANNWriter(OutputStream stream) {
|
||||
out = new PrintStream(stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the contents of the given dictionary into the ANN file.
|
||||
* Assumes that no ambs (blocks of indeterminate base) are present in the dictionary.
|
||||
* @param dictionary Dictionary to write.
|
||||
*/
|
||||
public void write(SAMSequenceDictionary dictionary) {
|
||||
long genomeLength = 0L;
|
||||
for(SAMSequenceRecord sequence: dictionary.getSequences())
|
||||
genomeLength += sequence.getSequenceLength();
|
||||
|
||||
int sequences = dictionary.getSequences().size();
|
||||
|
||||
// Write the header
|
||||
out.printf("%d %d %d%n",genomeLength,sequences,BNS_SEED);
|
||||
|
||||
for(SAMSequenceRecord sequence: dictionary.getSequences()) {
|
||||
String fullSequenceName = sequence.getSequenceName();
|
||||
String trimmedSequenceName = fullSequenceName;
|
||||
String sequenceComment = "(null)";
|
||||
|
||||
long offset = 0;
|
||||
|
||||
// Separate the sequence name from the sequence comment, based on BWA's definition.
|
||||
// BWA's definition appears to accept a zero-length contig name, so mimic that behavior.
|
||||
if(fullSequenceName.indexOf(' ') >= 0) {
|
||||
trimmedSequenceName = fullSequenceName.substring(0,fullSequenceName.indexOf(' '));
|
||||
sequenceComment = fullSequenceName.substring(fullSequenceName.indexOf(' ')+1);
|
||||
}
|
||||
|
||||
// Write the sequence GI (?), name, and comment.
|
||||
out.printf("%d %s %s%n",GI,trimmedSequenceName,sequenceComment);
|
||||
// Write the sequence offset, length, and ambs (currently fixed at 0).
|
||||
out.printf("%d %d %d%n",offset,sequence.getSequenceLength(),0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the given output stream.
|
||||
*/
|
||||
public void close() {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,172 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
/**
|
||||
* Represents the Burrows-Wheeler Transform of a reference sequence.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWT {
|
||||
/**
|
||||
* Write an occurrence table after every SEQUENCE_BLOCK_SIZE bases.
|
||||
* For this implementation to behave correctly, SEQUENCE_BLOCK_SIZE % 8 == 0
|
||||
*/
|
||||
public static final int SEQUENCE_BLOCK_SIZE = 128;
|
||||
|
||||
/**
|
||||
* The inverse SA, used as a placeholder for determining where the special EOL character sits.
|
||||
*/
|
||||
protected final long inverseSA0;
|
||||
|
||||
/**
|
||||
* Cumulative counts for the entire BWT.
|
||||
*/
|
||||
protected final Counts counts;
|
||||
|
||||
/**
|
||||
* The individual sequence blocks, modelling how they appear on disk.
|
||||
*/
|
||||
protected final SequenceBlock[] sequenceBlocks;
|
||||
|
||||
/**
|
||||
* Creates a new BWT with the given inverse SA, counts, and sequence (in ASCII).
|
||||
* @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence.
|
||||
* @param counts Cumulative count of bases, in A,C,G,T order.
|
||||
* @param sequenceBlocks The full BWT sequence, sans the '$'.
|
||||
*/
|
||||
public BWT( long inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) {
|
||||
this.inverseSA0 = inverseSA0;
|
||||
this.counts = counts;
|
||||
this.sequenceBlocks = sequenceBlocks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new BWT with the given inverse SA, occurrences, and sequence (in ASCII).
|
||||
* @param inverseSA0 Inverse SA entry for the first element. Will be missing from the BWT sequence.
|
||||
* @param counts Count of bases, in A,C,G,T order.
|
||||
* @param sequence The full BWT sequence, sans the '$'.
|
||||
*/
|
||||
public BWT( long inverseSA0, Counts counts, byte[] sequence ) {
|
||||
this(inverseSA0,counts,generateSequenceBlocks(sequence));
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the full sequence from the list of block.
|
||||
* @return The full BWT string as a byte array.
|
||||
*/
|
||||
public byte[] getSequence() {
|
||||
byte[] sequence = new byte[(int)counts.getTotal()];
|
||||
for( SequenceBlock block: sequenceBlocks )
|
||||
System.arraycopy(block.sequence,0,sequence,block.sequenceStart,block.sequenceLength);
|
||||
return sequence;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search.
|
||||
* @param base The base.
|
||||
* @return Total counts for all bases lexicographically smaller than this base.
|
||||
*/
|
||||
public long counts(byte base) {
|
||||
return counts.getCumulative(base);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the total counts of bases lexicographically smaller than the given base, for Ferragina and Manzini's search.
|
||||
* @param base The base.
|
||||
* @param index The position to search within the BWT.
|
||||
* @return Total counts for all bases lexicographically smaller than this base.
|
||||
*/
|
||||
public long occurrences(byte base,long index) {
|
||||
SequenceBlock block = getSequenceBlock(index);
|
||||
int position = getSequencePosition(index);
|
||||
long accumulator = block.occurrences.get(base);
|
||||
for(int i = 0; i <= position; i++) {
|
||||
if(base == block.sequence[i])
|
||||
accumulator++;
|
||||
}
|
||||
return accumulator;
|
||||
}
|
||||
|
||||
/**
|
||||
* The number of bases in the BWT as a whole.
|
||||
* @return Number of bases.
|
||||
*/
|
||||
public long length() {
|
||||
return counts.getTotal();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new BWT from the given reference sequence.
|
||||
* @param referenceSequence Sequence from which to derive the BWT.
|
||||
* @return reference sequence-derived BWT.
|
||||
*/
|
||||
public static BWT createFromReferenceSequence(byte[] referenceSequence) {
|
||||
SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
|
||||
|
||||
byte[] bwt = new byte[(int)suffixArray.length()-1];
|
||||
int bwtIndex = 0;
|
||||
for(long suffixArrayIndex = 0; suffixArrayIndex < suffixArray.length(); suffixArrayIndex++) {
|
||||
if(suffixArray.get(suffixArrayIndex) == 0)
|
||||
continue;
|
||||
bwt[bwtIndex++] = referenceSequence[(int)suffixArray.get(suffixArrayIndex)-1];
|
||||
}
|
||||
|
||||
return new BWT(suffixArray.inverseSA0,suffixArray.occurrences,bwt);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the base at a given position in the BWT.
|
||||
* @param index The index to use.
|
||||
* @return The base at that location.
|
||||
*/
|
||||
protected byte getBase(long index) {
|
||||
if(index == inverseSA0)
|
||||
throw new ReviewedStingException(String.format("Base at index %d does not have a text representation",index));
|
||||
|
||||
SequenceBlock block = getSequenceBlock(index);
|
||||
int position = getSequencePosition(index);
|
||||
return block.sequence[position];
|
||||
}
|
||||
|
||||
private SequenceBlock getSequenceBlock(long index) {
|
||||
// If the index is above the SA-1[0], remap it to the appropriate coordinate space.
|
||||
if(index > inverseSA0) index--;
|
||||
return sequenceBlocks[(int)(index/SEQUENCE_BLOCK_SIZE)];
|
||||
}
|
||||
|
||||
private int getSequencePosition(long index) {
|
||||
// If the index is above the SA-1[0], remap it to the appropriate coordinate space.
|
||||
if(index > inverseSA0) index--;
|
||||
return (int)(index%SEQUENCE_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a set of sequence blocks from one long sequence.
|
||||
* @param sequence Sequence from which to derive blocks.
|
||||
* @return Array of sequence blocks containing data from the sequence.
|
||||
*/
|
||||
private static SequenceBlock[] generateSequenceBlocks( byte[] sequence ) {
|
||||
Counts occurrences = new Counts();
|
||||
|
||||
int numSequenceBlocks = PackUtils.numberOfPartitions(sequence.length,SEQUENCE_BLOCK_SIZE);
|
||||
SequenceBlock[] sequenceBlocks = new SequenceBlock[numSequenceBlocks];
|
||||
|
||||
for( int block = 0; block < numSequenceBlocks; block++ ) {
|
||||
int blockStart = block*SEQUENCE_BLOCK_SIZE;
|
||||
int blockLength = Math.min(SEQUENCE_BLOCK_SIZE, sequence.length-blockStart);
|
||||
byte[] subsequence = new byte[blockLength];
|
||||
|
||||
System.arraycopy(sequence,blockStart,subsequence,0,blockLength);
|
||||
|
||||
sequenceBlocks[block] = new SequenceBlock(blockStart,blockLength,occurrences.clone(),subsequence);
|
||||
|
||||
for( byte base: subsequence )
|
||||
occurrences.increment(base);
|
||||
}
|
||||
|
||||
return sequenceBlocks;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedInputStream;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.BasePackedInputStream;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteOrder;
|
||||
/**
|
||||
* Reads a BWT from a given file.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWTReader {
|
||||
/**
|
||||
* Input stream from which to read BWT data.
|
||||
*/
|
||||
private FileInputStream inputStream;
|
||||
|
||||
/**
|
||||
* Create a new BWT reader.
|
||||
* @param inputFile File in which the BWT is stored.
|
||||
*/
|
||||
public BWTReader( File inputFile ) {
|
||||
try {
|
||||
this.inputStream = new FileInputStream(inputFile);
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new ReviewedStingException("Unable to open input file", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a BWT from the input stream.
|
||||
* @return The BWT stored in the input stream.
|
||||
*/
|
||||
public BWT read() {
|
||||
UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
BasePackedInputStream basePackedInputStream = new BasePackedInputStream<Integer>(Integer.class, inputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
long inverseSA0;
|
||||
long[] count;
|
||||
SequenceBlock[] sequenceBlocks;
|
||||
|
||||
try {
|
||||
inverseSA0 = uintPackedInputStream.read();
|
||||
count = new long[PackUtils.ALPHABET_SIZE];
|
||||
uintPackedInputStream.read(count);
|
||||
|
||||
long bwtSize = count[PackUtils.ALPHABET_SIZE-1];
|
||||
sequenceBlocks = new SequenceBlock[PackUtils.numberOfPartitions(bwtSize,BWT.SEQUENCE_BLOCK_SIZE)];
|
||||
|
||||
for( int block = 0; block < sequenceBlocks.length; block++ ) {
|
||||
int sequenceStart = block* BWT.SEQUENCE_BLOCK_SIZE;
|
||||
int sequenceLength = (int)Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart);
|
||||
|
||||
long[] occurrences = new long[PackUtils.ALPHABET_SIZE];
|
||||
byte[] bwt = new byte[sequenceLength];
|
||||
|
||||
uintPackedInputStream.read(occurrences);
|
||||
basePackedInputStream.read(bwt);
|
||||
|
||||
sequenceBlocks[block] = new SequenceBlock(sequenceStart,sequenceLength,new Counts(occurrences,false),bwt);
|
||||
}
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new ReviewedStingException("Unable to read BWT from input stream.", ex);
|
||||
}
|
||||
|
||||
return new BWT(inverseSA0, new Counts(count,true), sequenceBlocks);
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the input stream.
|
||||
*/
|
||||
public void close() {
|
||||
try {
|
||||
inputStream.close();
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new ReviewedStingException("Unable to close input file", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Generate BWA supplementary files (.ann, .amb) from the command line.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWTSupplementaryFileGenerator {
|
||||
enum SupplementaryFileType { ANN, AMB }
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
if(args.length < 3)
|
||||
usage("Incorrect number of arguments supplied");
|
||||
|
||||
File fastaFile = new File(args[0]);
|
||||
File outputFile = new File(args[1]);
|
||||
SupplementaryFileType outputType = null;
|
||||
try {
|
||||
outputType = Enum.valueOf(SupplementaryFileType.class,args[2]);
|
||||
}
|
||||
catch(IllegalArgumentException ex) {
|
||||
usage("Invalid output type: " + args[2]);
|
||||
}
|
||||
|
||||
ReferenceSequenceFile sequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(fastaFile);
|
||||
SAMSequenceDictionary dictionary = sequenceFile.getSequenceDictionary();
|
||||
|
||||
switch(outputType) {
|
||||
case ANN:
|
||||
ANNWriter annWriter = new ANNWriter(outputFile);
|
||||
annWriter.write(dictionary);
|
||||
annWriter.close();
|
||||
break;
|
||||
case AMB:
|
||||
AMBWriter ambWriter = new AMBWriter(outputFile);
|
||||
ambWriter.writeEmpty(dictionary);
|
||||
ambWriter.close();
|
||||
break;
|
||||
default:
|
||||
usage("Unsupported output type: " + outputType);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Print usage information and exit.
|
||||
*/
|
||||
private static void usage(String message) {
|
||||
System.err.println(message);
|
||||
System.err.println("Usage: BWTSupplementaryFileGenerator <fasta> <output file> <output type>");
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedOutputStream;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.BasePackedOutputStream;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* Writes an in-memory BWT to an outputstream.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWTWriter {
|
||||
/**
|
||||
* Input stream from which to read BWT data.
|
||||
*/
|
||||
private final OutputStream outputStream;
|
||||
|
||||
/**
|
||||
* Create a new BWT writer.
|
||||
* @param outputFile File in which the BWT is stored.
|
||||
*/
|
||||
public BWTWriter( File outputFile ) {
|
||||
try {
|
||||
this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new ReviewedStingException("Unable to open output file", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a BWT to the output stream.
|
||||
* @param bwt Transform to be written to the output stream.
|
||||
*/
|
||||
public void write( BWT bwt ) {
|
||||
UnsignedIntPackedOutputStream intPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream<Integer>(Integer.class, outputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
try {
|
||||
intPackedOutputStream.write(bwt.inverseSA0);
|
||||
intPackedOutputStream.write(bwt.counts.toArray(true));
|
||||
|
||||
for( SequenceBlock block: bwt.sequenceBlocks ) {
|
||||
intPackedOutputStream.write(block.occurrences.toArray(false));
|
||||
basePackedOutputStream.write(block.sequence);
|
||||
}
|
||||
|
||||
// The last block is the last set of counts in the structure.
|
||||
intPackedOutputStream.write(bwt.counts.toArray(false));
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new ReviewedStingException("Unable to read BWT from input stream.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the input stream.
|
||||
*/
|
||||
public void close() {
|
||||
try {
|
||||
outputStream.close();
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new ReviewedStingException("Unable to close input file", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Enhanced enum representation of a base.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class Bases implements Iterable<Byte>
|
||||
{
|
||||
public static byte A = 'A';
|
||||
public static byte C = 'C';
|
||||
public static byte G = 'G';
|
||||
public static byte T = 'T';
|
||||
|
||||
public static final Bases instance = new Bases();
|
||||
|
||||
private static final List<Byte> allBases;
|
||||
|
||||
/**
|
||||
* Representation of the base broken down by packed value.
|
||||
*/
|
||||
private static final Map<Integer,Byte> basesByPack = new HashMap<Integer,Byte>();
|
||||
|
||||
static {
|
||||
List<Byte> bases = new ArrayList<Byte>();
|
||||
bases.add(A);
|
||||
bases.add(C);
|
||||
bases.add(G);
|
||||
bases.add(T);
|
||||
allBases = Collections.unmodifiableList(bases);
|
||||
|
||||
for(int i = 0; i < allBases.size(); i++)
|
||||
basesByPack.put(i,allBases.get(i));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new base with the given ascii representation and
|
||||
* pack value.
|
||||
*/
|
||||
private Bases() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all possible bases.
|
||||
* @return Byte representation of all bases.
|
||||
*/
|
||||
public static Collection<Byte> allOf() {
|
||||
return allBases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the number of known bases.
|
||||
* @return The number of known bases.
|
||||
*/
|
||||
public static int size() {
|
||||
return allBases.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an iterator over the total number of known base types.
|
||||
* @return Iterator over all known bases.
|
||||
*/
|
||||
public Iterator<Byte> iterator() {
|
||||
return basesByPack.values().iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the given base from the packed representation.
|
||||
* @param pack Packed representation.
|
||||
* @return base.
|
||||
*/
|
||||
public static byte fromPack( int pack ) { return basesByPack.get(pack); }
|
||||
|
||||
/**
|
||||
* Convert the given base to its packed value.
|
||||
* @param ascii ASCII representation of the base.
|
||||
* @return Packed value.
|
||||
*/
|
||||
public static int toPack( byte ascii )
|
||||
{
|
||||
for( Map.Entry<Integer,Byte> entry: basesByPack.entrySet() ) {
|
||||
if( entry.getValue().equals(ascii) )
|
||||
return entry.getKey();
|
||||
}
|
||||
throw new ReviewedStingException(String.format("Base %c is an invalid base to pack", (char)ascii));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the ASCII representation of a base to its 'normalized' representation.
|
||||
* @param base The base itself.
|
||||
* @return The byte, if present. Null if unknown.
|
||||
*/
|
||||
public static Byte fromASCII( byte base ) {
|
||||
Byte found = null;
|
||||
for( Byte normalized: allBases ) {
|
||||
if( normalized.equals(base) ) {
|
||||
found = normalized;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return found;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,151 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Counts of how many bases of each type have been seen.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class Counts implements Cloneable {
|
||||
/**
|
||||
* Internal representation of counts, broken down by ASCII value.
|
||||
*/
|
||||
private Map<Byte,Long> counts = new HashMap<Byte,Long>();
|
||||
|
||||
/**
|
||||
* Internal representation of cumulative counts, broken down by ASCII value.
|
||||
*/
|
||||
private Map<Byte,Long> cumulativeCounts = new HashMap<Byte,Long>();
|
||||
|
||||
/**
|
||||
* Create an empty Counts object with values A=0,C=0,G=0,T=0.
|
||||
*/
|
||||
public Counts()
|
||||
{
|
||||
for(byte base: Bases.instance) {
|
||||
counts.put(base,0L);
|
||||
cumulativeCounts.put(base,0L);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a counts data structure with the given initial values.
|
||||
* @param data Count data, broken down by base.
|
||||
* @param cumulative Whether the counts are cumulative, (count_G=numA+numC+numG,for example).
|
||||
*/
|
||||
public Counts( long[] data, boolean cumulative ) {
|
||||
if(cumulative) {
|
||||
long priorCount = 0;
|
||||
for(byte base: Bases.instance) {
|
||||
long count = data[Bases.toPack(base)];
|
||||
counts.put(base,count-priorCount);
|
||||
cumulativeCounts.put(base,priorCount);
|
||||
priorCount = count;
|
||||
}
|
||||
}
|
||||
else {
|
||||
long priorCount = 0;
|
||||
for(byte base: Bases.instance) {
|
||||
long count = data[Bases.toPack(base)];
|
||||
counts.put(base,count);
|
||||
cumulativeCounts.put(base,priorCount);
|
||||
priorCount += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert to an array for persistence.
|
||||
* @param cumulative Use a cumulative representation.
|
||||
* @return Array of count values.
|
||||
*/
|
||||
public long[] toArray(boolean cumulative) {
|
||||
long[] countArray = new long[counts.size()];
|
||||
if(cumulative) {
|
||||
int index = 0;
|
||||
boolean first = true;
|
||||
for(byte base: Bases.instance) {
|
||||
if(first) {
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
countArray[index++] = getCumulative(base);
|
||||
}
|
||||
countArray[countArray.length-1] = getTotal();
|
||||
}
|
||||
else {
|
||||
int index = 0;
|
||||
for(byte base: Bases.instance)
|
||||
countArray[index++] = counts.get(base);
|
||||
}
|
||||
return countArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a unique copy of the current object.
|
||||
* @return A duplicate of this object.
|
||||
*/
|
||||
public Counts clone() {
|
||||
Counts other;
|
||||
try {
|
||||
other = (Counts)super.clone();
|
||||
}
|
||||
catch(CloneNotSupportedException ex) {
|
||||
throw new ReviewedStingException("Unable to clone counts object", ex);
|
||||
}
|
||||
other.counts = new HashMap<Byte,Long>(counts);
|
||||
other.cumulativeCounts = new HashMap<Byte,Long>(cumulativeCounts);
|
||||
return other;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increment the number of bases seen at the given location.
|
||||
* @param base Base to increment.
|
||||
*/
|
||||
public void increment(byte base) {
|
||||
counts.put(base,counts.get(base)+1);
|
||||
boolean increment = false;
|
||||
for(byte cumulative: Bases.instance) {
|
||||
if(increment) cumulativeCounts.put(cumulative,cumulativeCounts.get(cumulative)+1);
|
||||
increment |= (cumulative == base);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a count of the number of bases seen at a given location.
|
||||
* Note that counts in this case are not cumulative (counts for A,C,G,T
|
||||
* are independent).
|
||||
* @param base Base for which to query counts.
|
||||
* @return Number of bases of this type seen.
|
||||
*/
|
||||
public long get(byte base) {
|
||||
return counts.get(base);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a count of the number of bases seen before this base.
|
||||
* Note that counts in this case are cumulative.
|
||||
* @param base Base for which to query counts.
|
||||
* @return Number of bases of this type seen.
|
||||
*/
|
||||
public long getCumulative(byte base) {
|
||||
return cumulativeCounts.get(base);
|
||||
}
|
||||
|
||||
/**
|
||||
* How many total bases are represented by this count structure?
|
||||
* @return Total bases represented.
|
||||
*/
|
||||
public long getTotal() {
|
||||
int accumulator = 0;
|
||||
for(byte base: Bases.instance) {
|
||||
accumulator += get(base);
|
||||
}
|
||||
return accumulator;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,200 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITHoc THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||
|
||||
/**
|
||||
* Create a suffix array data structure.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class CreateBWTFromReference {
|
||||
private byte[] loadReference( File inputFile ) {
|
||||
// Read in the first sequence in the input file
|
||||
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
||||
ReferenceSequence sequence = reference.nextSequence();
|
||||
return sequence.getBases();
|
||||
}
|
||||
|
||||
private byte[] loadReverseReference( File inputFile ) {
|
||||
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
||||
ReferenceSequence sequence = reference.nextSequence();
|
||||
PackUtils.reverse(sequence.getBases());
|
||||
return sequence.getBases();
|
||||
}
|
||||
|
||||
private Counts countOccurrences( byte[] sequence ) {
|
||||
Counts occurrences = new Counts();
|
||||
for( byte base: sequence )
|
||||
occurrences.increment(base);
|
||||
return occurrences;
|
||||
}
|
||||
|
||||
private long[] createSuffixArray( byte[] sequence ) {
|
||||
return SuffixArray.createFromReferenceSequence(sequence).sequence;
|
||||
}
|
||||
|
||||
private long[] invertSuffixArray( long[] suffixArray ) {
|
||||
long[] inverseSuffixArray = new long[suffixArray.length];
|
||||
for( int i = 0; i < suffixArray.length; i++ )
|
||||
inverseSuffixArray[(int)suffixArray[i]] = i;
|
||||
return inverseSuffixArray;
|
||||
}
|
||||
|
||||
private long[] createCompressedSuffixArray( int[] suffixArray, int[] inverseSuffixArray ) {
|
||||
long[] compressedSuffixArray = new long[suffixArray.length];
|
||||
compressedSuffixArray[0] = inverseSuffixArray[0];
|
||||
for( int i = 1; i < suffixArray.length; i++ )
|
||||
compressedSuffixArray[i] = inverseSuffixArray[suffixArray[i]+1];
|
||||
return compressedSuffixArray;
|
||||
}
|
||||
|
||||
private long[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) {
|
||||
long[] inverseCompressedSuffixArray = new long[compressedSuffixArray.length];
|
||||
for( int i = 0; i < compressedSuffixArray.length; i++ )
|
||||
inverseCompressedSuffixArray[compressedSuffixArray[i]] = i;
|
||||
return inverseCompressedSuffixArray;
|
||||
}
|
||||
|
||||
public static void main( String argv[] ) throws IOException {
|
||||
if( argv.length != 5 ) {
|
||||
System.out.println("USAGE: CreateBWTFromReference <input>.fasta <output bwt> <output rbwt> <output sa> <output rsa>");
|
||||
return;
|
||||
}
|
||||
|
||||
String inputFileName = argv[0];
|
||||
File inputFile = new File(inputFileName);
|
||||
|
||||
String bwtFileName = argv[1];
|
||||
File bwtFile = new File(bwtFileName);
|
||||
|
||||
String rbwtFileName = argv[2];
|
||||
File rbwtFile = new File(rbwtFileName);
|
||||
|
||||
String saFileName = argv[3];
|
||||
File saFile = new File(saFileName);
|
||||
|
||||
String rsaFileName = argv[4];
|
||||
File rsaFile = new File(rsaFileName);
|
||||
|
||||
CreateBWTFromReference creator = new CreateBWTFromReference();
|
||||
|
||||
byte[] sequence = creator.loadReference(inputFile);
|
||||
byte[] reverseSequence = creator.loadReverseReference(inputFile);
|
||||
|
||||
// Count the occurences of each given base.
|
||||
Counts occurrences = creator.countOccurrences(sequence);
|
||||
System.out.printf("Occurrences: a=%d, c=%d, g=%d, t=%d%n",occurrences.getCumulative(Bases.A),
|
||||
occurrences.getCumulative(Bases.C),
|
||||
occurrences.getCumulative(Bases.G),
|
||||
occurrences.getCumulative(Bases.T));
|
||||
|
||||
// Generate the suffix array and print diagnostics.
|
||||
long[] suffixArrayData = creator.createSuffixArray(sequence);
|
||||
long[] reverseSuffixArrayData = creator.createSuffixArray(reverseSequence);
|
||||
|
||||
// Invert the suffix array and print diagnostics.
|
||||
long[] inverseSuffixArray = creator.invertSuffixArray(suffixArrayData);
|
||||
long[] reverseInverseSuffixArray = creator.invertSuffixArray(reverseSuffixArrayData);
|
||||
|
||||
SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData );
|
||||
SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData );
|
||||
|
||||
/*
|
||||
// Create the data structure for the compressed suffix array and print diagnostics.
|
||||
int[] compressedSuffixArray = creator.createCompressedSuffixArray(suffixArray.sequence,inverseSuffixArray);
|
||||
int reconstructedInverseSA = compressedSuffixArray[0];
|
||||
for( int i = 0; i < 8; i++ ) {
|
||||
System.out.printf("compressedSuffixArray[%d] = %d (SA-1[%d] = %d)%n", i, compressedSuffixArray[i], i, reconstructedInverseSA);
|
||||
reconstructedInverseSA = compressedSuffixArray[reconstructedInverseSA];
|
||||
}
|
||||
|
||||
// Create the data structure for the inverse compressed suffix array and print diagnostics.
|
||||
int[] inverseCompressedSuffixArray = creator.createInversedCompressedSuffixArray(compressedSuffixArray);
|
||||
for( int i = 0; i < 8; i++ ) {
|
||||
System.out.printf("inverseCompressedSuffixArray[%d] = %d%n", i, inverseCompressedSuffixArray[i]);
|
||||
}
|
||||
*/
|
||||
|
||||
// Create the BWT.
|
||||
BWT bwt = BWT.createFromReferenceSequence(sequence);
|
||||
BWT reverseBWT = BWT.createFromReferenceSequence(reverseSequence);
|
||||
|
||||
byte[] bwtSequence = bwt.getSequence();
|
||||
System.out.printf("BWT: %s... (length = %d)%n", new String(bwtSequence,0,80),bwt.length());
|
||||
|
||||
BWTWriter bwtWriter = new BWTWriter(bwtFile);
|
||||
bwtWriter.write(bwt);
|
||||
bwtWriter.close();
|
||||
|
||||
BWTWriter reverseBWTWriter = new BWTWriter(rbwtFile);
|
||||
reverseBWTWriter.write(reverseBWT);
|
||||
reverseBWTWriter.close();
|
||||
|
||||
/*
|
||||
SuffixArrayWriter saWriter = new SuffixArrayWriter(saFile);
|
||||
saWriter.write(suffixArray);
|
||||
saWriter.close();
|
||||
|
||||
SuffixArrayWriter reverseSAWriter = new SuffixArrayWriter(rsaFile);
|
||||
reverseSAWriter.write(reverseSuffixArray);
|
||||
reverseSAWriter.close();
|
||||
*/
|
||||
|
||||
File existingBWTFile = new File(inputFileName+".bwt");
|
||||
BWTReader existingBWTReader = new BWTReader(existingBWTFile);
|
||||
BWT existingBWT = existingBWTReader.read();
|
||||
|
||||
byte[] existingBWTSequence = existingBWT.getSequence();
|
||||
System.out.printf("Existing BWT: %s... (length = %d)%n",new String(existingBWTSequence,0,80),existingBWT.length());
|
||||
|
||||
for( int i = 0; i < bwt.length(); i++ ) {
|
||||
if( bwtSequence[i] != existingBWTSequence[i] )
|
||||
throw new ReviewedStingException("BWT mismatch at " + i);
|
||||
}
|
||||
|
||||
File existingSAFile = new File(inputFileName+".sa");
|
||||
SuffixArrayReader existingSuffixArrayReader = new SuffixArrayReader(existingSAFile,existingBWT);
|
||||
SuffixArray existingSuffixArray = existingSuffixArrayReader.read();
|
||||
|
||||
for(int i = 0; i < suffixArray.length(); i++) {
|
||||
if( i % 10000 == 0 )
|
||||
System.out.printf("Validating suffix array entry %d%n", i);
|
||||
if( suffixArray.get(i) != existingSuffixArray.get(i) )
|
||||
throw new ReviewedStingException(String.format("Suffix array mismatch at %d; SA is %d; should be %d",i,existingSuffixArray.get(i),suffixArray.get(i)));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
/**
|
||||
* Models a block of bases within the BWT.
|
||||
*/
|
||||
public class SequenceBlock {
|
||||
/**
|
||||
* Start position of this sequence within the BWT.
|
||||
*/
|
||||
public final int sequenceStart;
|
||||
|
||||
/**
|
||||
* Length of this sequence within the BWT.
|
||||
*/
|
||||
public final int sequenceLength;
|
||||
|
||||
|
||||
/**
|
||||
* Occurrences of each letter up to this sequence block.
|
||||
*/
|
||||
public final Counts occurrences;
|
||||
|
||||
/**
|
||||
* Sequence for this segment.
|
||||
*/
|
||||
public final byte[] sequence;
|
||||
|
||||
/**
|
||||
* Create a new block within this BWT.
|
||||
* @param sequenceStart Starting position of this sequence within the BWT.
|
||||
* @param sequenceLength Length of this sequence.
|
||||
* @param occurrences How many of each base has been seen before this sequence began.
|
||||
* @param sequence The actual sequence from the BWT.
|
||||
*/
|
||||
public SequenceBlock( int sequenceStart, int sequenceLength, Counts occurrences, byte[] sequence ) {
|
||||
this.sequenceStart = sequenceStart;
|
||||
this.sequenceLength = sequenceLength;
|
||||
this.occurrences = occurrences;
|
||||
this.sequence = sequence;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
|
||||
/**
|
||||
* An in-memory representation of a suffix array.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SuffixArray {
|
||||
public final long inverseSA0;
|
||||
public final Counts occurrences;
|
||||
|
||||
/**
|
||||
* The elements of the sequence actually stored in memory.
|
||||
*/
|
||||
protected final long[] sequence;
|
||||
|
||||
/**
|
||||
* How often are individual elements in the sequence actually stored
|
||||
* in memory, as opposed to being calculated on the fly?
|
||||
*/
|
||||
protected final int sequenceInterval;
|
||||
|
||||
/**
|
||||
* The BWT used to calculate missing portions of the sequence.
|
||||
*/
|
||||
protected final BWT bwt;
|
||||
|
||||
public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence) {
|
||||
this(inverseSA0,occurrences,sequence,1,null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new sequence array with the given inverse SA, occurrences, and values.
|
||||
* @param inverseSA0 Inverse SA entry for the first element.
|
||||
* @param occurrences Cumulative number of occurrences of A,C,G,T, in order.
|
||||
* @param sequence The full suffix array.
|
||||
* @param sequenceInterval How frequently is the sequence interval stored.
|
||||
* @param bwt bwt used to infer the remaining entries in the BWT.
|
||||
*/
|
||||
public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence, int sequenceInterval, BWT bwt) {
|
||||
this.inverseSA0 = inverseSA0;
|
||||
this.occurrences = occurrences;
|
||||
this.sequence = sequence;
|
||||
this.sequenceInterval = sequenceInterval;
|
||||
this.bwt = bwt;
|
||||
|
||||
if(sequenceInterval != 1 && bwt == null)
|
||||
throw new ReviewedStingException("A BWT must be provided if the sequence interval is not 1");
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the length of the sequence array.
|
||||
* @return Length of the suffix array.
|
||||
*/
|
||||
public long length() {
|
||||
if( bwt != null )
|
||||
return bwt.length()+1;
|
||||
else
|
||||
return sequence.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the suffix array value at a given sequence.
|
||||
* @param index Index at which to retrieve the suffix array vaule.
|
||||
* @return The suffix array value at that entry.
|
||||
*/
|
||||
public long get(long index) {
|
||||
int iterations = 0;
|
||||
while(index%sequenceInterval != 0) {
|
||||
// The inverseSA0 ('$') doesn't have a usable ASCII representation; it must be treated as a special case.
|
||||
if(index == inverseSA0)
|
||||
index = 0;
|
||||
else {
|
||||
byte base = bwt.getBase(index);
|
||||
index = bwt.counts(base) + bwt.occurrences(base,index);
|
||||
}
|
||||
iterations++;
|
||||
}
|
||||
return (sequence[(int)(index/sequenceInterval)]+iterations) % length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a suffix array from a given reference sequence.
|
||||
* @param sequence The reference sequence to use when building the suffix array.
|
||||
* @return a constructed suffix array.
|
||||
*/
|
||||
public static SuffixArray createFromReferenceSequence(byte[] sequence) {
|
||||
// The builder for the suffix array. Use an integer in this case because
|
||||
// Java arrays can only hold an integer.
|
||||
TreeSet<Integer> suffixArrayBuilder = new TreeSet<Integer>(new SuffixArrayComparator(sequence));
|
||||
|
||||
Counts occurrences = new Counts();
|
||||
for( byte base: sequence )
|
||||
occurrences.increment(base);
|
||||
|
||||
// Build out the suffix array using a custom comparator.
|
||||
for( int i = 0; i <= sequence.length; i++ )
|
||||
suffixArrayBuilder.add(i);
|
||||
|
||||
// Copy the suffix array into an array.
|
||||
long[] suffixArray = new long[suffixArrayBuilder.size()];
|
||||
int i = 0;
|
||||
for( Integer element: suffixArrayBuilder )
|
||||
suffixArray[i++] = element;
|
||||
|
||||
// Find the first element in the inverse suffix array.
|
||||
long inverseSA0 = -1;
|
||||
for(i = 0; i < suffixArray.length; i++) {
|
||||
if(suffixArray[i] == 0)
|
||||
inverseSA0 = i;
|
||||
}
|
||||
if(inverseSA0 < 0)
|
||||
throw new ReviewedStingException("Unable to find first inverse SA entry in generated suffix array.");
|
||||
|
||||
return new SuffixArray(inverseSA0,occurrences,suffixArray);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two suffix arrays of the given sequence. Will return whichever string appears
|
||||
* first in lexicographic order.
|
||||
*/
|
||||
private static class SuffixArrayComparator implements Comparator<Integer> {
|
||||
/**
|
||||
* The data source for all suffix arrays.
|
||||
*/
|
||||
private final String sequence;
|
||||
|
||||
/**
|
||||
* Create a new comparator.
|
||||
* @param sequence Reference sequence to use as basis for comparison.
|
||||
*/
|
||||
public SuffixArrayComparator( byte[] sequence ) {
|
||||
// Processing the suffix array tends to be easier as a string.
|
||||
this.sequence = StringUtil.bytesToString(sequence);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare the two given suffix arrays. Criteria for comparison is the lexicographic order of
|
||||
* the two substrings sequence[lhs:], sequence[rhs:].
|
||||
* @param lhs Left-hand side of comparison.
|
||||
* @param rhs Right-hand side of comparison.
|
||||
* @return How the suffix arrays represented by lhs, rhs compare.
|
||||
*/
|
||||
public int compare( Integer lhs, Integer rhs ) {
|
||||
String lhsSuffixArray = sequence.substring(lhs);
|
||||
String rhsSuffixArray = sequence.substring(rhs);
|
||||
return lhsSuffixArray.compareTo(rhsSuffixArray);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedInputStream;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* A reader for suffix arrays in permanent storage.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SuffixArrayReader {
|
||||
/**
|
||||
* Input stream from which to read suffix array data.
|
||||
*/
|
||||
private FileInputStream inputStream;
|
||||
|
||||
/**
|
||||
* BWT to use to fill in missing data.
|
||||
*/
|
||||
private BWT bwt;
|
||||
|
||||
/**
|
||||
* Create a new suffix array reader.
|
||||
* @param inputFile File in which the suffix array is stored.
|
||||
* @param bwt BWT to use when filling in missing data.
|
||||
*/
|
||||
public SuffixArrayReader(File inputFile, BWT bwt) {
|
||||
try {
|
||||
this.inputStream = new FileInputStream(inputFile);
|
||||
this.bwt = bwt;
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new ReviewedStingException("Unable to open input file", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a suffix array from the input stream.
|
||||
* @return The suffix array stored in the input stream.
|
||||
*/
|
||||
public SuffixArray read() {
|
||||
UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
long inverseSA0;
|
||||
long[] occurrences;
|
||||
long[] suffixArray;
|
||||
int suffixArrayInterval;
|
||||
|
||||
try {
|
||||
inverseSA0 = uintPackedInputStream.read();
|
||||
occurrences = new long[PackUtils.ALPHABET_SIZE];
|
||||
uintPackedInputStream.read(occurrences);
|
||||
// Throw away the suffix array size in bytes and use the occurrences table directly.
|
||||
suffixArrayInterval = (int)uintPackedInputStream.read();
|
||||
suffixArray = new long[(int)((occurrences[occurrences.length-1]+suffixArrayInterval-1)/suffixArrayInterval)];
|
||||
uintPackedInputStream.read(suffixArray);
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new ReviewedStingException("Unable to read BWT from input stream.", ex);
|
||||
}
|
||||
|
||||
return new SuffixArray(inverseSA0, new Counts(occurrences,true), suffixArray, suffixArrayInterval, bwt);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Close the input stream.
|
||||
*/
|
||||
public void close() {
|
||||
try {
|
||||
inputStream.close();
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new ReviewedStingException("Unable to close input file", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
package org.broadinstitute.sting.alignment.reference.bwt;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.UnsignedIntPackedOutputStream;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* Javadoc goes here.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SuffixArrayWriter {
|
||||
/**
|
||||
* Input stream from which to read suffix array data.
|
||||
*/
|
||||
private OutputStream outputStream;
|
||||
|
||||
/**
|
||||
* Create a new suffix array reader.
|
||||
* @param outputFile File in which the suffix array is stored.
|
||||
*/
|
||||
public SuffixArrayWriter( File outputFile ) {
|
||||
try {
|
||||
this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new ReviewedStingException("Unable to open input file", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a suffix array to the output stream.
|
||||
* @param suffixArray suffix array to write.
|
||||
*/
|
||||
public void write(SuffixArray suffixArray) {
|
||||
UnsignedIntPackedOutputStream uintPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
try {
|
||||
uintPackedOutputStream.write(suffixArray.inverseSA0);
|
||||
uintPackedOutputStream.write(suffixArray.occurrences.toArray(true));
|
||||
// How frequently the suffix array entry is placed.
|
||||
uintPackedOutputStream.write(1);
|
||||
// Length of the suffix array.
|
||||
uintPackedOutputStream.write(suffixArray.length()-1);
|
||||
uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length-1);
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new ReviewedStingException("Unable to read BWT from input stream.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Close the input stream.
|
||||
*/
|
||||
public void close() {
|
||||
try {
|
||||
outputStream.close();
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new ReviewedStingException("Unable to close input file", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
package org.broadinstitute.sting.alignment.reference.packing;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
|
||||
/**
|
||||
* Reads a packed version of the input stream.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BasePackedInputStream<T> {
|
||||
/**
|
||||
* Type of object to unpack.
|
||||
*/
|
||||
private final Class<T> type;
|
||||
|
||||
/**
|
||||
* Ultimate source for packed bases.
|
||||
*/
|
||||
private final FileInputStream targetInputStream;
|
||||
|
||||
/**
|
||||
* Channel source for packed bases.
|
||||
*/
|
||||
private final FileChannel targetInputChannel;
|
||||
|
||||
/**
|
||||
* A fixed-size buffer for word-packed data.
|
||||
*/
|
||||
private final ByteOrder byteOrder;
|
||||
|
||||
/**
|
||||
* How many bases are in a given packed word.
|
||||
*/
|
||||
private final int basesPerPackedWord = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE;
|
||||
|
||||
/**
|
||||
* How many bytes in an integer?
|
||||
*/
|
||||
private final int bytesPerInteger = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE;
|
||||
|
||||
|
||||
public BasePackedInputStream( Class<T> type, File inputFile, ByteOrder byteOrder ) throws FileNotFoundException {
|
||||
this(type,new FileInputStream(inputFile),byteOrder);
|
||||
}
|
||||
|
||||
public BasePackedInputStream( Class<T> type, FileInputStream inputStream, ByteOrder byteOrder ) {
|
||||
if( type != Integer.class )
|
||||
throw new ReviewedStingException("Only bases packed into 32-bit words are currently supported by this input stream. Type specified: " + type.getName());
|
||||
this.type = type;
|
||||
this.targetInputStream = inputStream;
|
||||
this.targetInputChannel = inputStream.getChannel();
|
||||
this.byteOrder = byteOrder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the entire contents of the input stream.
|
||||
* @param bwt array into which bases should be read.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void read(byte[] bwt) throws IOException {
|
||||
read(bwt,0,bwt.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the next <code>length</code> bases into the bwt array, starting at the given offset.
|
||||
* @param bwt array holding the given data.
|
||||
* @param offset target position in the bases array into which bytes should be written.
|
||||
* @param length number of bases to read from the stream.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void read(byte[] bwt, int offset, int length) throws IOException {
|
||||
int bufferWidth = ((bwt.length+basesPerPackedWord-1)/basesPerPackedWord)*bytesPerInteger;
|
||||
ByteBuffer buffer = ByteBuffer.allocate(bufferWidth).order(byteOrder);
|
||||
targetInputChannel.read(buffer);
|
||||
targetInputChannel.position(targetInputChannel.position()+buffer.remaining());
|
||||
buffer.flip();
|
||||
|
||||
int packedWord = 0;
|
||||
int i = 0;
|
||||
while(i < length) {
|
||||
if(i % basesPerPackedWord == 0) packedWord = buffer.getInt();
|
||||
int position = basesPerPackedWord - i%basesPerPackedWord - 1;
|
||||
bwt[offset+i++] = PackUtils.unpackBase((byte)((packedWord >> position*PackUtils.BITS_PER_BASE) & 0x3));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
package org.broadinstitute.sting.alignment.reference.packing;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* A general-purpose stream for writing packed bases.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BasePackedOutputStream<T> {
|
||||
/**
|
||||
* Type of object to pack.
|
||||
*/
|
||||
private final Class<T> type;
|
||||
|
||||
/**
|
||||
* How many bases can be stored in the given data structure?
|
||||
*/
|
||||
private final int basesPerType;
|
||||
|
||||
/**
|
||||
* Ultimate target for the packed bases.
|
||||
*/
|
||||
private final OutputStream targetOutputStream;
|
||||
|
||||
/**
|
||||
* A fixed-size buffer for word-packed data.
|
||||
*/
|
||||
private final ByteBuffer buffer;
|
||||
|
||||
public BasePackedOutputStream( Class<T> type, File outputFile, ByteOrder byteOrder ) throws FileNotFoundException {
|
||||
this(type,new BufferedOutputStream(new FileOutputStream(outputFile)),byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write packed bases to the given output stream.
|
||||
* @param type Type of data to pack bases into.
|
||||
* @param outputStream Output stream to which to write packed bases.
|
||||
* @param byteOrder Switch between big endian / little endian when reading / writing files.
|
||||
*/
|
||||
public BasePackedOutputStream( Class<T> type, OutputStream outputStream, ByteOrder byteOrder) {
|
||||
this.targetOutputStream = outputStream;
|
||||
this.type = type;
|
||||
basesPerType = PackUtils.bitsInType(type)/PackUtils.BITS_PER_BASE;
|
||||
this.buffer = ByteBuffer.allocate(basesPerType/PackUtils.ALPHABET_SIZE).order(byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the given base to the output stream. Will write only this base; no packing will be performed.
|
||||
* @param base List of bases to write.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( int base ) throws IOException {
|
||||
write( new byte[] { (byte)base } );
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes an array of bases to the target output stream.
|
||||
* @param bases List of bases to write.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( byte[] bases ) throws IOException {
|
||||
write(bases,0,bases.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a subset of the array of bases to the output stream.
|
||||
* @param bases List of bases to write.
|
||||
* @param offset site at which to start writing.
|
||||
* @param length number of bases to write.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( byte[] bases, int offset, int length ) throws IOException {
|
||||
int packedBases = 0;
|
||||
int positionInPack = 0;
|
||||
|
||||
for( int base = offset; base < offset+length; base++ ) {
|
||||
packedBases = packBase(bases[base], packedBases, positionInPack);
|
||||
|
||||
// Increment the packed counter. If all possible bases have been squeezed into this byte, write it out.
|
||||
positionInPack = ++positionInPack % basesPerType;
|
||||
if( positionInPack == 0 ) {
|
||||
writePackedBases(packedBases);
|
||||
packedBases = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if( positionInPack > 0 )
|
||||
writePackedBases(packedBases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush the contents of the OutputStream to disk.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void flush() throws IOException {
|
||||
targetOutputStream.flush();
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the given output stream.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
targetOutputStream.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Pack the given base into the basepack.
|
||||
* @param base The base to pack.
|
||||
* @param basePack Target for the pack operation.
|
||||
* @param position Position within the pack to which to add the base.
|
||||
* @return The packed integer.
|
||||
*/
|
||||
private int packBase( byte base, int basePack, int position ) {
|
||||
basePack |= (PackUtils.packBase(base) << 2*(basesPerType-position-1));
|
||||
return basePack;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the given packed base structure to the output file.
|
||||
* @param packedBases Packed bases to write.
|
||||
* @throws IOException on error writing to the file.
|
||||
*/
|
||||
private void writePackedBases(int packedBases) throws IOException {
|
||||
buffer.rewind();
|
||||
if( type == Integer.class )
|
||||
buffer.putInt(packedBases);
|
||||
else if( type == Byte.class )
|
||||
buffer.put((byte)packedBases);
|
||||
else
|
||||
throw new ReviewedStingException("Cannot pack bases into type " + type.getName());
|
||||
targetOutputStream.write(buffer.array());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.alignment.reference.packing;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* Generate a .PAC file from a given reference.
|
||||
*
|
||||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
|
||||
public class CreatePACFromReference {
|
||||
public static void main( String argv[] ) throws IOException {
|
||||
if( argv.length != 3 ) {
|
||||
System.out.println("USAGE: CreatePACFromReference <input>.fasta <output pac> <output rpac>");
|
||||
return;
|
||||
}
|
||||
|
||||
// Read in the first sequence in the input file
|
||||
String inputFileName = argv[0];
|
||||
File inputFile = new File(inputFileName);
|
||||
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
||||
ReferenceSequence sequence = reference.nextSequence();
|
||||
|
||||
// Target file for output
|
||||
PackUtils.writeReferenceSequence( new File(argv[1]), sequence.getBases() );
|
||||
|
||||
// Reverse the bases in the reference
|
||||
PackUtils.reverse(sequence.getBases());
|
||||
|
||||
// Target file for output
|
||||
PackUtils.writeReferenceSequence( new File(argv[2]), sequence.getBases() );
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,135 @@
|
|||
package org.broadinstitute.sting.alignment.reference.packing;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* Utilities designed for packing / unpacking bases.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class PackUtils {
|
||||
/**
|
||||
* How many possible bases can be encoded?
|
||||
*/
|
||||
public static final int ALPHABET_SIZE = 4;
|
||||
|
||||
/**
|
||||
* How many bits does it take to store a single base?
|
||||
*/
|
||||
public static final int BITS_PER_BASE = (int)(Math.log(ALPHABET_SIZE)/Math.log(2));
|
||||
|
||||
/**
|
||||
* How many bits fit into a single byte?
|
||||
*/
|
||||
public static final int BITS_PER_BYTE = 8;
|
||||
|
||||
/**
|
||||
* Writes a reference sequence to a PAC file.
|
||||
* @param outputFile Filename for the PAC file.
|
||||
* @param referenceSequence Reference sequence to write.
|
||||
* @throws IOException If there's a problem writing to the output file.
|
||||
*/
|
||||
public static void writeReferenceSequence( File outputFile, byte[] referenceSequence ) throws IOException {
|
||||
OutputStream outputStream = new FileOutputStream(outputFile);
|
||||
|
||||
BasePackedOutputStream<Byte> basePackedOutputStream = new BasePackedOutputStream<Byte>(Byte.class, outputStream, ByteOrder.BIG_ENDIAN);
|
||||
basePackedOutputStream.write(referenceSequence);
|
||||
|
||||
outputStream.write(referenceSequence.length%PackUtils.ALPHABET_SIZE);
|
||||
|
||||
outputStream.close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* How many bits can a given type hold?
|
||||
* @param type Type to test.
|
||||
* @return Number of bits that the given type can hold.
|
||||
*/
|
||||
public static int bitsInType( Class<?> type ) {
|
||||
try {
|
||||
long typeSize = type.getField("MAX_VALUE").getLong(null) - type.getField("MIN_VALUE").getLong(null)+1;
|
||||
long intTypeSize = (long)Integer.MAX_VALUE - (long)Integer.MIN_VALUE + 1;
|
||||
if( typeSize > intTypeSize )
|
||||
throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName());
|
||||
return (int)(Math.log(typeSize)/Math.log(2));
|
||||
}
|
||||
catch( NoSuchFieldException ex ) {
|
||||
throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName(),ex);
|
||||
}
|
||||
catch( IllegalAccessException ex ) {
|
||||
throw new ReviewedStingException("Cannot determine number of bits available in type: " + type.getName(),ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the two-bit representation of a base. A=00b, C=01b, G=10b, T=11b.
|
||||
* @param base ASCII value for the base to pack.
|
||||
* @return A byte from 0-3 indicating the base's packed value.
|
||||
*/
|
||||
public static byte packBase(byte base) {
|
||||
switch( base ) {
|
||||
case 'A':
|
||||
return 0;
|
||||
case 'C':
|
||||
return 1;
|
||||
case 'G':
|
||||
return 2;
|
||||
case 'T':
|
||||
return 3;
|
||||
default:
|
||||
throw new ReviewedStingException("Unknown base type: " + base);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a two-bit representation of a base into an ASCII representation of a base.
|
||||
* @param pack Byte from 0-3 indicating which base is represented.
|
||||
* @return An ASCII value representing the packed base.
|
||||
*/
|
||||
public static byte unpackBase(byte pack) {
|
||||
switch( pack ) {
|
||||
case 0:
|
||||
return 'A';
|
||||
case 1:
|
||||
return 'C';
|
||||
case 2:
|
||||
return 'G';
|
||||
case 3:
|
||||
return 'T';
|
||||
default:
|
||||
throw new ReviewedStingException("Unknown pack type: " + pack);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverses an unpacked sequence of bases.
|
||||
* @param bases bases to reverse.
|
||||
*/
|
||||
public static void reverse( byte[] bases ) {
|
||||
for( int i = 0, j = bases.length-1; i < j; i++, j-- ) {
|
||||
byte temp = bases[j];
|
||||
bases[j] = bases[i];
|
||||
bases[i] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a structure of size <code>size</code> that should be split
|
||||
* into <code>partitionSize</code> partitions, how many partitions should
|
||||
* be created? Size of last partition will be <= partitionSize.
|
||||
* @param size Total size of the data structure.
|
||||
* @param partitionSize Size of an individual partition.
|
||||
* @return Number of partitions that would be created.
|
||||
*/
|
||||
public static int numberOfPartitions( long size, long partitionSize ) {
|
||||
return (int)((size+partitionSize-1) / partitionSize);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
package org.broadinstitute.sting.alignment.reference.packing;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.FileChannel;
|
||||
|
||||
/**
|
||||
* Read a set of integers packed into
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class UnsignedIntPackedInputStream {
|
||||
/**
|
||||
* Ultimate target for the occurrence array.
|
||||
*/
|
||||
private final FileInputStream targetInputStream;
|
||||
|
||||
/**
|
||||
* Target channel from which to pull file data.
|
||||
*/
|
||||
private final FileChannel targetInputChannel;
|
||||
|
||||
/**
|
||||
* The byte order in which integer input data appears.
|
||||
*/
|
||||
private final ByteOrder byteOrder;
|
||||
|
||||
/**
|
||||
* How many bytes are required to store an integer?
|
||||
*/
|
||||
private final int bytesPerInteger = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE;
|
||||
|
||||
/**
|
||||
* Create a new PackedIntInputStream, writing to the given target file.
|
||||
* @param inputFile target input file.
|
||||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
* @throws java.io.IOException if an I/O error occurs.
|
||||
*/
|
||||
public UnsignedIntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException {
|
||||
this(new FileInputStream(inputFile),byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read ints from the given InputStream.
|
||||
* @param inputStream Input stream from which to read ints.
|
||||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
*/
|
||||
public UnsignedIntPackedInputStream(FileInputStream inputStream, ByteOrder byteOrder) {
|
||||
this.targetInputStream = inputStream;
|
||||
this.targetInputChannel = inputStream.getChannel();
|
||||
this.byteOrder = byteOrder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a datum from the input stream.
|
||||
* @return The next input datum in the stream.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public long read() throws IOException {
|
||||
long[] data = new long[1];
|
||||
read(data);
|
||||
return data[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the data from the input stream.
|
||||
* @param data placeholder for input data.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void read( long[] data ) throws IOException {
|
||||
read( data, 0, data.length );
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the data from the input stream, starting at the given offset.
|
||||
* @param data placeholder for input data.
|
||||
* @param offset place in the array to start reading in data.
|
||||
* @param length number of ints to read in.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void read( long[] data, int offset, int length ) throws IOException {
|
||||
ByteBuffer readBuffer = ByteBuffer.allocate(bytesPerInteger*length).order(byteOrder);
|
||||
|
||||
targetInputChannel.read(readBuffer,targetInputChannel.position());
|
||||
readBuffer.flip();
|
||||
targetInputChannel.position(targetInputChannel.position()+readBuffer.remaining());
|
||||
|
||||
int i = 0;
|
||||
while(i < length)
|
||||
data[offset+i++] = readBuffer.getInt() & 0xFFFFFFFFL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the given output stream.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
targetInputStream.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.alignment.reference.packing;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* Writes an list of integers to the output file.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class UnsignedIntPackedOutputStream {
|
||||
/**
|
||||
* Ultimate target for the occurrence array.
|
||||
*/
|
||||
private final OutputStream targetOutputStream;
|
||||
|
||||
/**
|
||||
* A fixed-size buffer for int-packed data.
|
||||
*/
|
||||
private final ByteBuffer buffer;
|
||||
|
||||
/**
|
||||
* Create a new PackedIntOutputStream, writing to the given target file.
|
||||
* @param outputFile target output file.
|
||||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public UnsignedIntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException {
|
||||
this(new FileOutputStream(outputFile),byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write packed ints to the given OutputStream.
|
||||
* @param outputStream Output stream to which to write packed ints.
|
||||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
*/
|
||||
public UnsignedIntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) {
|
||||
this.targetOutputStream = outputStream;
|
||||
buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the data to the output stream.
|
||||
* @param datum datum to write.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( long datum ) throws IOException {
|
||||
buffer.rewind();
|
||||
buffer.putInt((int)datum);
|
||||
targetOutputStream.write(buffer.array());
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the data to the output stream.
|
||||
* @param data data to write. occurrences.length must match alphabet size.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( long[] data ) throws IOException {
|
||||
for(long datum: data)
|
||||
write(datum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the given chunk of data to the input stream.
|
||||
* @param data data to write.
|
||||
* @param offset position at which to start.
|
||||
* @param length number of ints to write.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( long[] data, int offset, int length ) throws IOException {
|
||||
for( int i = offset; i < offset+length; i++ )
|
||||
write(data[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush the contents of the OutputStream to disk.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void flush() throws IOException {
|
||||
targetOutputStream.flush();
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the given output stream.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
targetOutputStream.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Create a mask for use with the PickSequenomProbes walker.
|
||||
*/
|
||||
public class CreateSequenomMask extends RodWalker<Integer, Integer> {
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
public void initialize() {}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
int result = 0;
|
||||
for ( VariantContext vc : tracker.getAllVariantContexts(ref) ) {
|
||||
if ( vc.isSNP() ) {
|
||||
GenomeLoc loc = context.getLocation();
|
||||
out.println(loc.getContig() + "\t" + (loc.getStart()-1) + "\t" + loc.getStop());
|
||||
result = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
logger.info("Found " + sum + " masking sites.");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,334 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.bed.BEDCodec;
|
||||
import org.broad.tribble.dbsnp.DbSNPCodec;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
import java.io.PrintStream;
|
||||
|
||||
|
||||
/**
|
||||
* Generates Sequenom probe information given a single variant track. Emitted is the variant
|
||||
* along with the 200 reference bases on each side of the variant.
|
||||
*/
|
||||
@WalkerName("PickSequenomProbes")
|
||||
@Requires(value={DataSource.REFERENCE})
|
||||
@Reference(window=@Window(start=-200,stop=200))
|
||||
public class PickSequenomProbes extends RodWalker<String, String> {
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
@Argument(required=false, shortName="snp_mask", doc="positions to be masked with N's")
|
||||
protected String SNP_MASK = null;
|
||||
@Argument(required=false, shortName="project_id",doc="If specified, all probenames will be prepended with 'project_id|'")
|
||||
String project_id = null;
|
||||
@Argument(required = false, shortName="omitWindow", doc = "If specified, the window appender will be omitted from the design files (e.g. \"_chr:start-stop\")")
|
||||
boolean omitWindow = false;
|
||||
@Argument(required = false, fullName="usePlinkRODNamingConvention", shortName="nameConvention",doc="Use the naming convention defined in PLINKROD")
|
||||
boolean useNamingConvention = false;
|
||||
@Argument(required = false, fullName="noMaskWindow",shortName="nmw",doc="Do not mask bases within X bases of an event when designing probes")
|
||||
int noMaskWindow = 0;
|
||||
@Argument(required = false, shortName="counter", doc = "If specified, unique count id (ordinal number) is added to the end of each assay name")
|
||||
boolean addCounter = false;
|
||||
|
||||
private byte [] maskFlags = new byte[401];
|
||||
|
||||
private LocationAwareSeekableRODIterator snpMaskIterator=null;
|
||||
|
||||
private GenomeLoc positionOfLastVariant = null;
|
||||
|
||||
private int cnt = 0;
|
||||
private int discarded = 0;
|
||||
|
||||
VariantCollection VCs ; // will keep a set of distinct variants at a given site
|
||||
private List<GenomeLoc> processedVariantsInScope = new LinkedList<GenomeLoc>();
|
||||
|
||||
public void initialize() {
|
||||
if ( SNP_MASK != null ) {
|
||||
logger.info("Loading SNP mask... ");
|
||||
ReferenceOrderedData snp_mask;
|
||||
//if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe);
|
||||
RMDTrack track = builder.createInstanceOfTrack(BEDCodec.class, new File(SNP_MASK));
|
||||
snpMaskIterator = new SeekableRODIterator(track.getHeader(),
|
||||
track.getSequenceDictionary(),
|
||||
getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
track.getIterator());
|
||||
//} else {
|
||||
// // TODO: fix me when Plink is back
|
||||
// throw new IllegalArgumentException("We currently do not support other snp_mask tracks (like Plink)");
|
||||
//}
|
||||
|
||||
}
|
||||
VCs = new VariantCollection();
|
||||
}
|
||||
|
||||
|
||||
public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return "";
|
||||
|
||||
logger.debug("Probing " + ref.getLocus() + " " + ref.getWindow());
|
||||
|
||||
VCs.clear();
|
||||
VCs.addAll( tracker.getAllVariantContexts(ref), ref.getLocus() );
|
||||
|
||||
discarded += VCs.discarded();
|
||||
|
||||
if ( VCs.size() == 0 ) {
|
||||
logger.debug(" Context empty");
|
||||
return "";
|
||||
}
|
||||
|
||||
if ( VCs.size() > 1 ) {
|
||||
logger.debug(" "+VCs.size()+ " variants at the locus");
|
||||
}
|
||||
|
||||
// System.out.print("At locus "+ref.getLocus()+": ");
|
||||
// for ( VariantContext vc : VCs ) {
|
||||
// System.out.println(vc.toString());
|
||||
// }
|
||||
|
||||
// little optimization: since we may have few events at the current site on the reference,
|
||||
// we are going to make sure we compute the mask and ref bases only once for each location and only if we need to
|
||||
boolean haveMaskForWindow = false;
|
||||
boolean haveBasesForWindow = false;
|
||||
String leading_bases = null;
|
||||
String trailing_bases = null;
|
||||
|
||||
StringBuilder assaysForLocus = new StringBuilder(""); // all assays for current locus will be collected here (will be multi-line if multiple events are assayed)
|
||||
|
||||
// get all variant contexts!!!!
|
||||
for ( VariantContext vc : VCs ) {
|
||||
|
||||
// we can only deal with biallelic sites for now
|
||||
if ( !vc.isBiallelic() ) {
|
||||
logger.debug(" Not biallelic; skipped");
|
||||
continue;
|
||||
}
|
||||
|
||||
// we don't want to see the same multi-base event (deletion, DNP etc) multiple times.
|
||||
// All the vcs we are currently seeing are clearly on the same contig as the current reference
|
||||
// poisiton (or we would not see them at all!). All we need to check is if the vc starts at the
|
||||
// current reference position (i.e. it is the first time we see it) or not (i.e. we saw it already).
|
||||
if ( ref.getLocus().getStart() != vc.getStart() )
|
||||
continue;
|
||||
|
||||
if ( ! haveMaskForWindow ) {
|
||||
String contig = context.getLocation().getContig();
|
||||
int offset = context.getLocation().getStart();
|
||||
int true_offset = offset - 200;
|
||||
|
||||
// we have variant; let's load all the snps falling into the current window and prepare the mask array.
|
||||
// we need to do it only once per window, regardless of how many vcs we may have at this location!
|
||||
if ( snpMaskIterator != null ) {
|
||||
// clear the mask
|
||||
for ( int i = 0 ; i < 401; i++ )
|
||||
maskFlags[i] = 0;
|
||||
|
||||
RODRecordList snpList = snpMaskIterator.seekForward(getToolkit().getGenomeLocParser().createGenomeLoc(contig,offset-200,offset+200));
|
||||
if ( snpList != null && snpList.size() != 0 ) {
|
||||
Iterator<GATKFeature> snpsInWindow = snpList.iterator();
|
||||
int i = 0;
|
||||
while ( snpsInWindow.hasNext() ) {
|
||||
GenomeLoc snp = snpsInWindow.next().getLocation();
|
||||
// we don't really want to mask out multi-base indels
|
||||
if ( snp.size() > 1 )
|
||||
continue;
|
||||
logger.debug(" SNP at "+snp.getStart());
|
||||
int offsetInWindow = (int)(snp.getStart() - true_offset);
|
||||
maskFlags[offsetInWindow] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
haveMaskForWindow = true; // if we use masking, we will probably need to recompute the window...
|
||||
}
|
||||
|
||||
if ( ! haveBasesForWindow ) {
|
||||
byte[] context_bases = ref.getBases();
|
||||
for (int i = 0; i < 401; i++) {
|
||||
if ( maskFlags[i] == 1 && ( i < 200 - noMaskWindow || i > 200 + getNoMaskWindowRightEnd(vc,noMaskWindow) ) ) {
|
||||
context_bases[i] = 'N';
|
||||
}
|
||||
}
|
||||
leading_bases = new String(Arrays.copyOfRange(context_bases, 0, 200));
|
||||
trailing_bases = new String(Arrays.copyOfRange(context_bases, 201, 401));
|
||||
// masked bases are not gonna change for the current window, unless we use windowed masking;
|
||||
// in the latter case the bases (N's) will depend on the event we are currently looking at,
|
||||
// so we better recompute..
|
||||
if ( noMaskWindow == 0 ) haveBasesForWindow = true;
|
||||
}
|
||||
|
||||
|
||||
// below, build single assay line for the current VC:
|
||||
|
||||
String assay_sequence;
|
||||
if ( vc.isSNP() )
|
||||
assay_sequence = leading_bases + "[" + (char)ref.getBase() + "/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
|
||||
else if ( vc.isMNP() )
|
||||
assay_sequence = leading_bases + "[" + new String(vc.getReference().getBases()) + "/" + new String(vc.getAlternateAllele(0).getBases())+"]"+trailing_bases.substring(vc.getReference().length()-1);
|
||||
else if ( vc.isInsertion() )
|
||||
assay_sequence = leading_bases + (char)ref.getBase() + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
|
||||
else if ( vc.isDeletion() )
|
||||
assay_sequence = leading_bases + (char)ref.getBase() + "[" + new String(vc.getReference().getBases()) + "/-]" + trailing_bases.substring(vc.getReference().length());
|
||||
else
|
||||
continue;
|
||||
|
||||
StringBuilder assay_id = new StringBuilder();
|
||||
if ( project_id != null ) {
|
||||
assay_id.append(project_id);
|
||||
assay_id.append('|');
|
||||
}
|
||||
if ( useNamingConvention ) {
|
||||
assay_id.append('c');
|
||||
assay_id.append(context.getLocation().toString().replace(":","_p"));
|
||||
} else {
|
||||
assay_id.append(context.getLocation().toString().replace(':','_'));
|
||||
}
|
||||
if ( vc.isInsertion() ) assay_id.append("_gI");
|
||||
else if ( vc.isDeletion()) assay_id.append("_gD");
|
||||
|
||||
if ( ! omitWindow ) {
|
||||
assay_id.append("_");
|
||||
assay_id.append(ref.getWindow().toString().replace(':', '_'));
|
||||
}
|
||||
++cnt;
|
||||
if ( addCounter ) assay_id.append("_"+cnt);
|
||||
|
||||
assaysForLocus.append(assay_id);
|
||||
assaysForLocus.append('\t');
|
||||
assaysForLocus.append(assay_sequence);
|
||||
assaysForLocus.append('\n');
|
||||
}
|
||||
return assaysForLocus.toString();
|
||||
}
|
||||
|
||||
public String reduceInit() {
|
||||
return "";
|
||||
}
|
||||
|
||||
public String reduce(String data, String sum) {
|
||||
out.print(data);
|
||||
return "";
|
||||
}
|
||||
|
||||
private int getNoMaskWindowRightEnd(VariantContext vc, int window) {
|
||||
if ( window == 0 ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( vc.isInsertion() ) {
|
||||
return window-1;
|
||||
}
|
||||
|
||||
int max = 0;
|
||||
for (Allele a : vc.getAlleles() ) {
|
||||
if ( vc.isInsertion() ) {
|
||||
logger.debug("Getting length of allele "+a.toString()+" it is "+a.getBases().length+" (ref allele is "+vc.getReference().toString()+")");
|
||||
}
|
||||
if ( a.getBases().length > max ) {
|
||||
max = a.getBases().length;
|
||||
}
|
||||
}
|
||||
return max+window-1;
|
||||
}
|
||||
|
||||
public void onTraversalDone(String sum) {
|
||||
logger.info(cnt+" assay seqences generated");
|
||||
logger.info(discarded+" events were found to be duplicates and discarded (no redundant assays generated)");
|
||||
}
|
||||
|
||||
static class EventComparator implements Comparator<VariantContext> {
|
||||
|
||||
public int compare(VariantContext o1, VariantContext o2) {
|
||||
// if variants start at different positions, they are different. All we actually
|
||||
// care about is detecting the variants that are strictly the same; the actual ordering of distinct variants
|
||||
// (which one we deem less and which one greater) is utterly unimportant. We just need to be consistent.
|
||||
if ( o1.getStart() < o2.getStart() ) return -1;
|
||||
if ( o1.getStart() > o2.getStart() ) return 1;
|
||||
|
||||
if ( o1.getType() != o2.getType() ) return o1.getType().compareTo(o2.getType());
|
||||
|
||||
int refComp = o1.getReference().compareTo(o2.getReference());
|
||||
if ( refComp != 0 ) return refComp;
|
||||
|
||||
return o1.getAlternateAllele(0).compareTo(o2.getAlternateAllele(0));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static class VariantCollection implements Iterable<VariantContext> {
|
||||
TreeSet<VariantContext> variants = new TreeSet(new EventComparator());
|
||||
int discarded = 0;
|
||||
|
||||
public void add(VariantContext vc, GenomeLoc current) {
|
||||
if ( vc.getStart() != current.getStart() ) return; // we add only variants that start at current locus
|
||||
// note that we do not check chr here, since the way this class is used, the mathod is always called with
|
||||
// VCs coming from the same metadata tracker, so they simply can not be on different chrs!
|
||||
if ( !vc.isBiallelic() ) {
|
||||
logger.info(" Non-biallelic variant encountered; skipped");
|
||||
return;
|
||||
}
|
||||
if ( variants.add(vc) == false ) discarded++;
|
||||
}
|
||||
|
||||
public void addAll(Collection<VariantContext> c, GenomeLoc current) {
|
||||
for ( VariantContext vc : c ) add(vc,current);
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
variants.clear();
|
||||
discarded = 0;
|
||||
}
|
||||
|
||||
public int discarded() { return discarded; }
|
||||
|
||||
public int size() { return variants.size(); }
|
||||
|
||||
public Iterator<VariantContext> iterator() { return variants.iterator(); }
|
||||
}
|
||||
}
|
||||
|
|
@ -34,7 +34,7 @@ import java.util.List;
|
|||
*/
|
||||
@Requires(value={DataSource.REFERENCE}, referenceMetaData={@RMD(name="ProbeIntervals",type=TableFeature.class),
|
||||
@RMD(name="ValidateAlleles",type=VariantContext.class),@RMD(name="MaskAlleles",type=VariantContext.class)})
|
||||
public class PickSequenomProbes2 extends RodWalker<Integer,Integer> {
|
||||
public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||
|
||||
@Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false)
|
||||
boolean lowerCaseSNPs = false;
|
||||
|
|
@ -45,6 +45,9 @@ public class PickSequenomProbes2 extends RodWalker<Integer,Integer> {
|
|||
@Argument(doc="Monomorphic sites in the mask file will be treated as filtered",fullName="filterMonomorphic",required=false)
|
||||
boolean filterMonomorphic = false;
|
||||
|
||||
@Argument(doc="Do not use BWA, lower-case repeats only",fullName="doNotUseBWA",required=false)
|
||||
boolean doNotUseBWA = false;
|
||||
|
||||
GenomeLoc prevInterval;
|
||||
GenomeLoc allelePos;
|
||||
String probeName;
|
||||
|
|
@ -67,16 +70,18 @@ public class PickSequenomProbes2 extends RodWalker<Integer,Integer> {
|
|||
private SAMFileHeader header = null;
|
||||
|
||||
public void initialize() {
|
||||
if(targetReferenceFile == null)
|
||||
targetReferenceFile = getToolkit().getArguments().referenceFile;
|
||||
BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath());
|
||||
BWAConfiguration configuration = new BWAConfiguration();
|
||||
aligner = new BWACAligner(bwtFiles,configuration);
|
||||
header = new SAMFileHeader();
|
||||
SAMSequenceDictionary referenceDictionary =
|
||||
ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary();
|
||||
header.setSequenceDictionary(referenceDictionary);
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
||||
if ( ! doNotUseBWA ) {
|
||||
if(targetReferenceFile == null)
|
||||
targetReferenceFile = getToolkit().getArguments().referenceFile;
|
||||
BWTFiles bwtFiles = new BWTFiles(targetReferenceFile.getAbsolutePath());
|
||||
BWAConfiguration configuration = new BWAConfiguration();
|
||||
aligner = new BWACAligner(bwtFiles,configuration);
|
||||
header = new SAMFileHeader();
|
||||
SAMSequenceDictionary referenceDictionary =
|
||||
ReferenceSequenceFileFactory.getReferenceSequenceFile(targetReferenceFile).getSequenceDictionary();
|
||||
header.setSequenceDictionary(referenceDictionary);
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
||||
}
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
|
|
@ -106,8 +111,11 @@ public class PickSequenomProbes2 extends RodWalker<Integer,Integer> {
|
|||
// there was a previous interval
|
||||
validateSequence(); // ensure the sequence in the region is valid
|
||||
// next line removed in favor of the one after
|
||||
//lowerRepeats(); // change repeats in sequence to lower case
|
||||
lowerNonUniqueSegments();
|
||||
if ( doNotUseBWA ) {
|
||||
lowerRepeats(); // change repeats in sequence to lower case
|
||||
} else {
|
||||
lowerNonUniqueSegments();
|
||||
}
|
||||
print(); // print out the fasta sequence
|
||||
}
|
||||
|
||||
|
|
@ -218,7 +226,11 @@ public class PickSequenomProbes2 extends RodWalker<Integer,Integer> {
|
|||
|
||||
public void onTraversalDone(Integer fin ) {
|
||||
validateSequence();
|
||||
lowerNonUniqueSegments();
|
||||
if ( doNotUseBWA ) {
|
||||
lowerRepeats();
|
||||
} else {
|
||||
lowerNonUniqueSegments();
|
||||
}
|
||||
print();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,27 @@
|
|||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Integration tests for the aligner.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class AlignerIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void testBasicAlignment() {
|
||||
String md5 = "34eb4323742999d6d250a0aaa803c6d5";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + GATKDataLocation + "bwa/human_b36_both.fasta" +
|
||||
" -T Align" +
|
||||
" -I " + validationDataLocation + "NA12878_Pilot1_20.trimmed.unmapped.bam" +
|
||||
" -o %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testBasicAlignment", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class PickSequenomProbesIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void testProbes() {
|
||||
String testVCF = validationDataLocation + "complexExample.vcf4";
|
||||
String testArgs = "-R " + b36KGReference + " -T PickSequenomProbes -L 1:10,000,000-11,000,000 -B:input,VCF "+testVCF+" -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("6b5409cc78960f1be855536ed89ea9dd"));
|
||||
executeTest("Test probes", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testProbesUsingDbSNPMask() {
|
||||
|
||||
String md5 = "46d53491af1d3aa0ee1f1e13d68b732d";
|
||||
String testVCF = validationDataLocation + "pickSeqIntegrationTest.vcf";
|
||||
|
||||
String testArgs = "-snp_mask " + validationDataLocation + "pickSeqIntegrationTest.bed -R "
|
||||
+ b36KGReference + " -omitWindow -nameConvention "
|
||||
+ "-project_id 1kgp3_s4_lf -T PickSequenomProbes -B:input,VCF "+testVCF+" -o %s";
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(testArgs, 1, Arrays.asList(md5));
|
||||
executeTest("Test probes", spec1);
|
||||
|
||||
testArgs += " -nmw 1";
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(testArgs, 1, Arrays.asList(md5));
|
||||
executeTest("Test probes", spec2);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.validation;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Ghost
|
||||
* Date: 7/19/11
|
||||
* Time: 7:39 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ValidationAmpliconsIntegrationTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
public void testWikiExample() {
|
||||
String siteVCF = validationDataLocation + "sites_to_validate.vcf";
|
||||
String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf";
|
||||
String intervalTable = validationDataLocation + "amplicon_interval_table1.table";
|
||||
String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s";
|
||||
testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF;
|
||||
testArgs += " --virtualPrimerSize 30";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("27f9450afa132888a8994167f0035fd7"));
|
||||
executeTest("Test probes", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWikiExampleNoBWA() {
|
||||
String siteVCF = validationDataLocation + "sites_to_validate.vcf";
|
||||
String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf";
|
||||
String intervalTable = validationDataLocation + "amplicon_interval_table1.table";
|
||||
String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s";
|
||||
testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF;
|
||||
testArgs += " --virtualPrimerSize 30 --doNotUseBWA";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1"));
|
||||
executeTest("Test probes", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWikiExampleMonoFilter() {
|
||||
String siteVCF = validationDataLocation + "sites_to_validate.vcf";
|
||||
String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf";
|
||||
String intervalTable = validationDataLocation + "amplicon_interval_table1.table";
|
||||
String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s";
|
||||
testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF;
|
||||
testArgs += " --virtualPrimerSize 30 --filterMonomorphic";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("77b3f30e38fedad812125bdf6cf3255f"));
|
||||
executeTest("Test probes", spec);
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue