Single alignment generator, another checkpoint. Does generate single alignments, but some of the data still
needs to plumbed through and it may leak memory. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2095 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a972b2769f
commit
a95302fe98
|
|
@ -38,8 +38,7 @@ BWA::~BWA() {
|
|||
|
||||
void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count)
|
||||
{
|
||||
bwa_seq_t* sequence = create_sequence();
|
||||
copy_bases_into_sequence(sequence, bases, read_length);
|
||||
bwa_seq_t* sequence = create_sequence(bases, read_length);
|
||||
|
||||
// Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln).
|
||||
// This method will destroy the contents of seq and rseq.
|
||||
|
|
@ -57,6 +56,26 @@ void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*&
|
|||
bwa_free_read_seq(1,sequence);
|
||||
}
|
||||
|
||||
Alignment BWA::generate_single_alignment(const char* bases, const unsigned read_length) {
|
||||
bwa_seq_t* sequence = create_sequence(bases,read_length);
|
||||
|
||||
// Calculate paths.
|
||||
bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);
|
||||
|
||||
// bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in.
|
||||
copy_bases_into_sequence(sequence,bases,read_length);
|
||||
|
||||
// Pick best alignment and propagate its information into the sequence.
|
||||
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
|
||||
|
||||
// Generate the best alignment from the sequence.
|
||||
Alignment alignment = generate_final_alignment_from_sequence(sequence);
|
||||
|
||||
bwa_free_read_seq(1,sequence);
|
||||
|
||||
return alignment;
|
||||
}
|
||||
|
||||
void BWA::generate_alignments_from_paths(const char* bases,
|
||||
const unsigned read_length,
|
||||
bwt_aln1_t* paths,
|
||||
|
|
@ -66,8 +85,7 @@ void BWA::generate_alignments_from_paths(const char* bases,
|
|||
Alignment*& alignments,
|
||||
unsigned& num_alignments)
|
||||
{
|
||||
bwa_seq_t* sequence = create_sequence();
|
||||
copy_bases_into_sequence(sequence, bases, read_length);
|
||||
bwa_seq_t* sequence = create_sequence(bases,read_length);
|
||||
|
||||
sequence->aln = paths;
|
||||
sequence->n_aln = num_paths;
|
||||
|
|
@ -108,35 +126,8 @@ void BWA::generate_alignments_from_paths(const char* bases,
|
|||
if(alignment_idx > 0)
|
||||
seq_reverse(sequence->len, sequence->seq, 0);
|
||||
|
||||
// Calculate the local coordinate and local alignment.
|
||||
bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr);
|
||||
bwa_refine_gapped(bns, 1, sequence, reference, NULL);
|
||||
|
||||
// Copy the local alignment data into the alignment object.
|
||||
Alignment& alignment = *(alignments + alignment_idx);
|
||||
|
||||
// Populate basic path info
|
||||
alignment.num_mismatches = sequence->n_mm;
|
||||
alignment.num_gap_opens = sequence->n_gapo;
|
||||
alignment.num_gap_extensions = sequence->n_gape;
|
||||
alignment.num_best = sequence->c1;
|
||||
alignment.num_second_best = sequence->c2;
|
||||
|
||||
alignment.type = sequence->type;
|
||||
bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig);
|
||||
alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1;
|
||||
alignment.negative_strand = sequence->strand;
|
||||
alignment.mapping_quality = sequence->mapQ;
|
||||
|
||||
alignment.cigar = NULL;
|
||||
if(sequence->cigar) {
|
||||
alignment.cigar = new uint16_t[sequence->n_cigar];
|
||||
memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
|
||||
}
|
||||
alignment.n_cigar = sequence->n_cigar;
|
||||
|
||||
delete[] sequence->md;
|
||||
sequence->md = NULL;
|
||||
*(alignments + alignment_idx) = generate_final_alignment_from_sequence(sequence);
|
||||
|
||||
alignment_idx++;
|
||||
}
|
||||
|
|
@ -148,6 +139,42 @@ void BWA::generate_alignments_from_paths(const char* bases,
|
|||
bwa_free_read_seq(1,sequence);
|
||||
}
|
||||
|
||||
Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) {
|
||||
// Calculate the local coordinate and local alignment.
|
||||
bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr);
|
||||
bwa_refine_gapped(bns, 1, sequence, reference, NULL);
|
||||
|
||||
// Copy the local alignment data into the alignment object.
|
||||
Alignment alignment;
|
||||
|
||||
// Populate basic path info
|
||||
alignment.num_mismatches = sequence->n_mm;
|
||||
alignment.num_gap_opens = sequence->n_gapo;
|
||||
alignment.num_gap_extensions = sequence->n_gape;
|
||||
alignment.num_best = sequence->c1;
|
||||
alignment.num_second_best = sequence->c2;
|
||||
|
||||
// Final alignment position.
|
||||
alignment.type = sequence->type;
|
||||
bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig);
|
||||
alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1;
|
||||
alignment.negative_strand = sequence->strand;
|
||||
alignment.mapping_quality = sequence->mapQ;
|
||||
|
||||
// Cigar step.
|
||||
alignment.cigar = NULL;
|
||||
if(sequence->cigar) {
|
||||
alignment.cigar = new uint16_t[sequence->n_cigar];
|
||||
memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
|
||||
}
|
||||
alignment.n_cigar = sequence->n_cigar;
|
||||
|
||||
delete[] sequence->md;
|
||||
sequence->md = NULL;
|
||||
|
||||
return alignment;
|
||||
}
|
||||
|
||||
void BWA::load_default_options()
|
||||
{
|
||||
options.s_mm = 3;
|
||||
|
|
@ -190,7 +217,7 @@ void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; }
|
|||
* Create a sequence with a set of reasonable initial defaults.
|
||||
* Will leave seq and rseq empty.
|
||||
*/
|
||||
bwa_seq_t* BWA::create_sequence()
|
||||
bwa_seq_t* BWA::create_sequence(const char* bases, const unsigned read_length)
|
||||
{
|
||||
bwa_seq_t* sequence = new bwa_seq_t;
|
||||
|
||||
|
|
@ -198,8 +225,8 @@ bwa_seq_t* BWA::create_sequence()
|
|||
|
||||
sequence->name = 0;
|
||||
|
||||
sequence->seq = NULL;
|
||||
sequence->rseq = NULL;
|
||||
copy_bases_into_sequence(sequence, bases, read_length);
|
||||
|
||||
sequence->qual = 0;
|
||||
sequence->aln = 0;
|
||||
sequence->md = 0;
|
||||
|
|
|
|||
|
|
@ -7,7 +7,23 @@
|
|||
#include "bwt.h"
|
||||
#include "bwtaln.h"
|
||||
|
||||
class Alignment;
|
||||
class Alignment {
|
||||
public:
|
||||
uint32_t type;
|
||||
int contig;
|
||||
bwtint_t pos;
|
||||
bool negative_strand;
|
||||
uint32_t mapping_quality;
|
||||
|
||||
uint16_t *cigar;
|
||||
int n_cigar;
|
||||
|
||||
uint8_t num_mismatches;
|
||||
uint8_t num_gap_opens;
|
||||
uint8_t num_gap_extensions;
|
||||
uint32_t num_best;
|
||||
uint32_t num_second_best;
|
||||
};
|
||||
|
||||
class BWA {
|
||||
private:
|
||||
|
|
@ -17,8 +33,9 @@ class BWA {
|
|||
gap_opt_t options;
|
||||
|
||||
void load_default_options();
|
||||
bwa_seq_t* create_sequence();
|
||||
bwa_seq_t* create_sequence(const char* bases, const unsigned read_length);
|
||||
void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length);
|
||||
Alignment generate_final_alignment_from_sequence(bwa_seq_t* sequence);
|
||||
|
||||
public:
|
||||
BWA(const char* ann_filename,
|
||||
|
|
@ -40,6 +57,8 @@ class BWA {
|
|||
void set_gap_extension_penalty(int penalty);
|
||||
|
||||
// Perform the alignment
|
||||
Alignment generate_single_alignment(const char* bases,
|
||||
const unsigned read_length);
|
||||
void find_paths(const char* bases,
|
||||
const unsigned read_length,
|
||||
bwt_aln1_t*& paths,
|
||||
|
|
@ -56,22 +75,4 @@ class BWA {
|
|||
unsigned& num_alignments);
|
||||
};
|
||||
|
||||
class Alignment {
|
||||
public:
|
||||
uint32_t type;
|
||||
int contig;
|
||||
bwtint_t pos;
|
||||
bool negative_strand;
|
||||
uint32_t mapping_quality;
|
||||
|
||||
uint16_t *cigar;
|
||||
int n_cigar;
|
||||
|
||||
uint8_t num_mismatches;
|
||||
uint8_t num_gap_opens;
|
||||
uint8_t num_gap_extensions;
|
||||
uint32_t num_best;
|
||||
uint32_t num_second_best;
|
||||
};
|
||||
|
||||
#endif // BWA_GATEWAY
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
typedef void (BWA::*int_setter)(int value);
|
||||
typedef void (BWA::*float_setter)(float value);
|
||||
|
||||
static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment);
|
||||
static jstring get_configuration_string(JNIEnv* env, jobject configuration, const char* field_name);
|
||||
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter);
|
||||
static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter);
|
||||
|
|
@ -228,66 +229,10 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA
|
|||
|
||||
for(unsigned alignment_idx = 0; alignment_idx < (unsigned)num_alignments; alignment_idx++) {
|
||||
Alignment& alignment = *(alignments + alignment_idx);
|
||||
|
||||
unsigned cigar_length;
|
||||
if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0;
|
||||
else if(!alignment.cigar) cigar_length = 1;
|
||||
else cigar_length = alignment.n_cigar;
|
||||
|
||||
jcharArray java_cigar_operators = env->NewCharArray(cigar_length);
|
||||
if(java_cigar_operators == NULL) return NULL;
|
||||
jintArray java_cigar_lengths = env->NewIntArray(cigar_length);
|
||||
if(java_cigar_lengths == NULL) return NULL;
|
||||
|
||||
if(alignment.cigar) {
|
||||
for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) {
|
||||
jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14];
|
||||
jint cigar_length = alignment.cigar[cigar_idx]&0x3fff;
|
||||
|
||||
env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(alignment.type != BWA_TYPE_NO_MATCH) {
|
||||
jchar cigar_operator = 'M';
|
||||
env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment");
|
||||
if(java_alignment_class == NULL) return NULL;
|
||||
|
||||
jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[IIIIII)V");
|
||||
if(java_alignment_constructor == NULL) return NULL;
|
||||
|
||||
jobject java_alignment = env->NewObject(java_alignment_class,
|
||||
java_alignment_constructor,
|
||||
alignment.contig,
|
||||
alignment.pos,
|
||||
alignment.negative_strand,
|
||||
alignment.mapping_quality,
|
||||
java_cigar_operators,
|
||||
java_cigar_lengths,
|
||||
alignment.num_mismatches,
|
||||
alignment.num_gap_opens,
|
||||
alignment.num_gap_extensions,
|
||||
alignment.num_best,
|
||||
alignment.num_second_best);
|
||||
if(java_alignment == NULL) return NULL;
|
||||
|
||||
delete[] alignment.cigar;
|
||||
|
||||
jobject java_alignment = convert_to_java_alignment(env,read_bases,read_length,alignment);
|
||||
if(java_alignment == NULL) return NULL;
|
||||
env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
env->DeleteLocalRef(java_alignment_class);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
|
||||
delete[] alignments;
|
||||
|
|
@ -298,6 +243,84 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA
|
|||
return env->ExceptionCheck() ? NULL : java_alignments;
|
||||
}
|
||||
|
||||
JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) {
|
||||
BWA* bwa = (BWA*)java_bwa;
|
||||
|
||||
const jsize read_length = env->GetArrayLength(java_bases);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
||||
if(read_bases == NULL) return NULL;
|
||||
|
||||
Alignment best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length);
|
||||
jobject java_best_alignment = convert_to_java_alignment(env,read_bases,read_length,best_alignment);
|
||||
|
||||
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
|
||||
|
||||
return java_best_alignment;
|
||||
}
|
||||
|
||||
static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment) {
|
||||
unsigned cigar_length;
|
||||
if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0;
|
||||
else if(!alignment.cigar) cigar_length = 1;
|
||||
else cigar_length = alignment.n_cigar;
|
||||
|
||||
jcharArray java_cigar_operators = env->NewCharArray(cigar_length);
|
||||
if(java_cigar_operators == NULL) return NULL;
|
||||
jintArray java_cigar_lengths = env->NewIntArray(cigar_length);
|
||||
if(java_cigar_lengths == NULL) return NULL;
|
||||
|
||||
if(alignment.cigar) {
|
||||
for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) {
|
||||
jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14];
|
||||
jint cigar_length = alignment.cigar[cigar_idx]&0x3fff;
|
||||
|
||||
env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(alignment.type != BWA_TYPE_NO_MATCH) {
|
||||
jchar cigar_operator = 'M';
|
||||
env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment");
|
||||
if(java_alignment_class == NULL) return NULL;
|
||||
|
||||
jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[IIIIII)V");
|
||||
if(java_alignment_constructor == NULL) return NULL;
|
||||
|
||||
jobject java_alignment = env->NewObject(java_alignment_class,
|
||||
java_alignment_constructor,
|
||||
alignment.contig,
|
||||
alignment.pos,
|
||||
alignment.negative_strand,
|
||||
alignment.mapping_quality,
|
||||
java_cigar_operators,
|
||||
java_cigar_lengths,
|
||||
alignment.num_mismatches,
|
||||
alignment.num_gap_opens,
|
||||
alignment.num_gap_extensions,
|
||||
alignment.num_best,
|
||||
alignment.num_second_best);
|
||||
if(java_alignment == NULL) return NULL;
|
||||
|
||||
delete[] alignment.cigar;
|
||||
|
||||
env->DeleteLocalRef(java_alignment_class);
|
||||
if(env->ExceptionCheck()) return NULL;
|
||||
|
||||
return java_alignment;
|
||||
}
|
||||
|
||||
static jstring get_configuration_string(JNIEnv* env, jobject configuration, const char* field_name) {
|
||||
jclass configuration_class = env->GetObjectClass(configuration);
|
||||
if(configuration_class == NULL) return NULL;
|
||||
|
|
|
|||
|
|
@ -39,6 +39,14 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA
|
|||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments
|
||||
(JNIEnv *, jobject, jlong, jbyteArray, jobjectArray);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
|
||||
* Method: getBestAlignment
|
||||
* Signature: (J[B)Lorg/broadinstitute/sting/alignment/Alignment;
|
||||
*/
|
||||
JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment
|
||||
(JNIEnv *, jobject, jlong, jbyteArray);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -28,8 +28,6 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
|
||||
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
|
||||
|
||||
//SortedMap<Integer,Integer> alignmentCounts = new TreeMap<Integer,Integer>();
|
||||
|
||||
/**
|
||||
* The instance used to generate alignments.
|
||||
*/
|
||||
|
|
@ -55,7 +53,6 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
@Override
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
return true;
|
||||
//return read.getReadName().contains("SL-XBC:1:13:1296:1410#0");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -72,16 +69,7 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases);
|
||||
|
||||
boolean matches = true;
|
||||
Iterator<Alignment[]> alignments = aligner.getAlignments(bases);
|
||||
/*
|
||||
BWAPath[] paths = aligner.getPaths(bases);
|
||||
int bestCount = 0;
|
||||
if(paths.length > 0) bestCount = paths[0].bestCount;
|
||||
if(alignmentCounts.containsKey(bestCount))
|
||||
alignmentCounts.put(bestCount,alignmentCounts.get(bestCount)+1);
|
||||
else
|
||||
alignmentCounts.put(bestCount,1);
|
||||
*/
|
||||
Iterator<Alignment[]> alignments = aligner.getAllAlignments(bases);
|
||||
|
||||
if(!alignments.hasNext()) {
|
||||
matches = read.getReadUnmappedFlag();
|
||||
|
|
@ -106,14 +94,17 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag()));
|
||||
logger.error(String.format(" Cigar: %s%n", read.getCigarString()));
|
||||
logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality()));
|
||||
Alignment[] allAlignments = aligner.getAllAlignments(bases);
|
||||
for(int i = 0; i < allAlignments.length; i++) {
|
||||
logger.error(String.format("Alignment %d:",i));
|
||||
logger.error(String.format(" Contig index: %d",allAlignments[i].getContigIndex()));
|
||||
logger.error(String.format(" Alignment start: %d", allAlignments[i].getAlignmentStart()));
|
||||
logger.error(String.format(" Negative strand: %b", allAlignments[i].isNegativeStrand()));
|
||||
logger.error(String.format(" Cigar: %s", allAlignments[i].getCigarString()));
|
||||
logger.error(String.format(" Mapping quality: %s%n", allAlignments[i].getMappingQuality()));
|
||||
Iterator<Alignment[]> alignmentIterator = aligner.getAllAlignments(bases);
|
||||
while(alignmentIterator.hasNext()) {
|
||||
Alignment[] alignmentsByScore = alignmentIterator.next();
|
||||
for(int i = 0; i < alignmentsByScore.length; i++) {
|
||||
logger.error(String.format("Alignment %d:",i));
|
||||
logger.error(String.format(" Contig index: %d",alignmentsByScore[i].getContigIndex()));
|
||||
logger.error(String.format(" Alignment start: %d", alignmentsByScore[i].getAlignmentStart()));
|
||||
logger.error(String.format(" Negative strand: %b", alignmentsByScore[i].isNegativeStrand()));
|
||||
logger.error(String.format(" Cigar: %s", alignmentsByScore[i].getCigarString()));
|
||||
logger.error(String.format(" Mapping quality: %s%n", alignmentsByScore[i].getMappingQuality()));
|
||||
}
|
||||
}
|
||||
throw new StingException(String.format("Read %s mismatches!", read.getReadName()));
|
||||
}
|
||||
|
|
@ -122,8 +113,6 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
logger.info(String.format("Processed %d reads", count));
|
||||
}
|
||||
|
||||
//logger.info(String.format("validated read %s", read.getReadName()));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -153,18 +142,6 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
public void onTraversalDone(Integer result) {
|
||||
aligner.close();
|
||||
super.onTraversalDone(result);
|
||||
|
||||
/*
|
||||
try {
|
||||
PrintWriter output = new PrintWriter("GATK_distribution.out");
|
||||
for(SortedMap.Entry<Integer,Integer> results: alignmentCounts.entrySet())
|
||||
output.printf("%d\t%d%n", results.getKey(), results.getValue());
|
||||
output.close();
|
||||
}
|
||||
catch(FileNotFoundException ex) {
|
||||
throw new StingException("Unable to open output file");
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.Iterator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
|
||||
/**
|
||||
* Align reads to the reference specified by BWTPrefix.
|
||||
|
|
@ -15,19 +16,26 @@ import java.util.Iterator;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@WalkerName("Align")
|
||||
public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
||||
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
|
||||
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
|
||||
|
||||
@Argument(fullName = "outputBam", shortName = "ob", doc = "Write output to this BAM filename instead of STDOUT", required = false)
|
||||
String outputBamFile = null;
|
||||
|
||||
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
|
||||
public Integer bamCompression = 5;
|
||||
|
||||
/**
|
||||
* The actual aligner.
|
||||
*/
|
||||
private BWACAligner aligner = null;
|
||||
|
||||
/**
|
||||
* A random number generator, used to generate alignments.
|
||||
* Target for reads to output.
|
||||
*/
|
||||
private Random random = new Random();
|
||||
private SAMFileWriter outputBam = null;
|
||||
|
||||
/**
|
||||
* Create an aligner object. The aligner object will load and hold the BWT until close() is called.
|
||||
|
|
@ -36,6 +44,11 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
|||
public void initialize() {
|
||||
BWACConfiguration configuration = new BWACConfiguration(prefix);
|
||||
aligner = new BWACAligner(configuration);
|
||||
|
||||
if ( outputBamFile != null ) {
|
||||
SAMFileHeader header = this.getToolkit().getSAMFileHeader();
|
||||
outputBam = Utils.createSAMFileWriterWithCompression(header, false, outputBamFile, bamCompression);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -46,17 +59,13 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
|||
*/
|
||||
@Override
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
Iterator<SAMRecord[]> alignedReads = aligner.align(read);
|
||||
if(alignedReads.hasNext()) {
|
||||
SAMRecord[] bestAlignments = alignedReads.next();
|
||||
SAMRecord selectedRead = bestAlignments[random.nextInt(bestAlignments.length)];
|
||||
out.println(selectedRead.format());
|
||||
return bestAlignments.length;
|
||||
}
|
||||
else {
|
||||
out.println(read.format());
|
||||
return 0;
|
||||
SAMRecord alignedRead = aligner.align(read);
|
||||
if (outputBam != null) {
|
||||
outputBam.addAlignment(alignedRead);
|
||||
} else {
|
||||
out.println(alignedRead.format());
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* An aligner using the BWA/C implementation.
|
||||
|
|
@ -23,40 +24,57 @@ public class BWACAligner {
|
|||
*/
|
||||
private long thunkPointer = 0;
|
||||
|
||||
/**
|
||||
* Create a pointer to the BWA/C thunk.
|
||||
* @param configuration Configuration of the aligner.
|
||||
* @return Pointer to the BWA/C thunk.
|
||||
*/
|
||||
protected native long create(BWACConfiguration configuration);
|
||||
|
||||
/**
|
||||
* Destroy the BWA/C thunk.
|
||||
* @param thunkPointer Pointer to the allocated thunk.
|
||||
*/
|
||||
protected native void destroy(long thunkPointer);
|
||||
|
||||
public BWACAligner(BWACConfiguration configuration) {
|
||||
if(thunkPointer != 0)
|
||||
throw new StingException("BWA/C attempting to reinitialize.");
|
||||
|
||||
if(!new File(configuration.annFileName).exists()) throw new StingException("ANN file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(configuration.ambFileName).exists()) throw new StingException("AMB file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(configuration.pacFileName).exists()) throw new StingException("PAC file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(configuration.forwardBWTFileName).exists()) throw new StingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(configuration.forwardSAFileName).exists()) throw new StingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(configuration.reverseBWTFileName).exists()) throw new StingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(configuration.reverseSAFileName).exists()) throw new StingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
|
||||
thunkPointer = create(configuration);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Close this instance of the BWA pointer and delete its resources.
|
||||
*/
|
||||
public void close() {
|
||||
if(thunkPointer == 0)
|
||||
throw new StingException("BWA/C close attempted, but BWA/C was never properly initialized.");
|
||||
throw new StingException("BWA/C close attempted, but BWA/C is not properly initialized.");
|
||||
destroy(thunkPointer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
|
||||
* @param bases Bases to align.
|
||||
* @return An align
|
||||
*/
|
||||
public Alignment getBestAlignment(final byte[] bases) {
|
||||
if(thunkPointer == 0)
|
||||
throw new StingException("BWA/C getBestAlignment attempted, but BWA/C is not properly initialized.");
|
||||
return getBestAlignment(thunkPointer,bases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the best aligned read, chosen randomly from the pile of best alignments.
|
||||
* @param read Read to align.
|
||||
* @return Read with injected alignment data.
|
||||
*/
|
||||
public SAMRecord align(final SAMRecord read) {
|
||||
return convertAlignmentToRead(read,getBestAlignment(read.getReadBases()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a iterator of alignments, batched by mapping quality.
|
||||
* @param bases List of bases.
|
||||
* @return Iterator to alignments.
|
||||
*/
|
||||
public Iterator<Alignment[]> getAlignments(final byte[] bases) {
|
||||
public Iterator<Alignment[]> getAllAlignments(final byte[] bases) {
|
||||
final BWAPath[] paths = getPaths(bases);
|
||||
return new Iterator<Alignment[]>() {
|
||||
/**
|
||||
|
|
@ -95,8 +113,8 @@ public class BWACAligner {
|
|||
* @param read Read to align.
|
||||
* @return Iterator to alignments.
|
||||
*/
|
||||
public Iterator<SAMRecord[]> align(final SAMRecord read) {
|
||||
final Iterator<Alignment[]> alignments = getAlignments(read.getReadBases());
|
||||
public Iterator<SAMRecord[]> alignAll(final SAMRecord read) {
|
||||
final Iterator<Alignment[]> alignments = getAllAlignments(read.getReadBases());
|
||||
return new Iterator<SAMRecord[]>() {
|
||||
/**
|
||||
* Whether all alignments have been seen based on the current position.
|
||||
|
|
@ -124,21 +142,6 @@ public class BWACAligner {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Push all alignment data into individual SAMRecords, gaining in convenience but losing some of
|
||||
* the additional data stored in an alignment object.
|
||||
* @param read The read to align.
|
||||
* @return A list of potential alignments.
|
||||
*/
|
||||
public SAMRecord[] alignAll(SAMRecord read) {
|
||||
Alignment[] alignments = getAllAlignments(read.getReadBases());
|
||||
SAMRecord[] reads = new SAMRecord[alignments.length];
|
||||
for(int i = 0; i < alignments.length; i++)
|
||||
reads[i] = convertAlignmentToRead(read,alignments[i]);
|
||||
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a read directly from an alignment.
|
||||
* @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags.
|
||||
|
|
@ -165,18 +168,6 @@ public class BWACAligner {
|
|||
return read;
|
||||
}
|
||||
|
||||
/**
|
||||
* Align the given base array to the BWT. The base array should be in ASCII format.
|
||||
* @param bases ASCII representation of byte array.
|
||||
* @return an array of indices into the bwa.
|
||||
*/
|
||||
public Alignment[] getAllAlignments(byte[] bases) {
|
||||
if(thunkPointer == 0)
|
||||
throw new StingException("BWA/C align attempted, but BWA/C is not properly initialized.");
|
||||
BWAPath[] paths = getPaths(bases);
|
||||
return convertPathsToAlignments(thunkPointer,bases,paths);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the paths associated with the given base string.
|
||||
* @param bases List of bases.
|
||||
|
|
@ -189,6 +180,19 @@ public class BWACAligner {
|
|||
return paths;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a pointer to the BWA/C thunk.
|
||||
* @param configuration Configuration of the aligner.
|
||||
* @return Pointer to the BWA/C thunk.
|
||||
*/
|
||||
protected native long create(BWACConfiguration configuration);
|
||||
|
||||
/**
|
||||
* Destroy the BWA/C thunk.
|
||||
* @param thunkPointer Pointer to the allocated thunk.
|
||||
*/
|
||||
protected native void destroy(long thunkPointer);
|
||||
|
||||
/**
|
||||
* Do the extra steps involved in converting a local alignment to a global alignment.
|
||||
* @param bases ASCII representation of byte array.
|
||||
|
|
@ -218,4 +222,12 @@ public class BWACAligner {
|
|||
* @return A list of alignments.
|
||||
*/
|
||||
protected native Alignment[] convertPathsToAlignments(long thunkPointer, byte[] bases, BWAPath[] paths);
|
||||
|
||||
/**
|
||||
* Gets the best alignment from BWA/C, randomly selected from all best-aligned reads.
|
||||
* @param thunkPointer Pointer to BWA thunk.
|
||||
* @param bases bases to align.
|
||||
* @return The best alignment from BWA/C.
|
||||
*/
|
||||
protected native Alignment getBestAlignment(long thunkPointer, byte[] bases);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue