From a95302fe9823aa443d650df7d89d72bbb0cc5940 Mon Sep 17 00:00:00 2001 From: hanna Date: Thu, 19 Nov 2009 21:20:03 +0000 Subject: [PATCH] Single alignment generator, another checkpoint. Does generate single alignments, but some of the data still needs to plumbed through and it may leak memory. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2095 348d0f76-0448-11de-a6fe-93d51630548a --- c/bwa/bwa_gateway.cpp | 97 +++++++----- c/bwa/bwa_gateway.h | 41 +++--- ...tute_sting_alignment_bwa_c_BWACAligner.cpp | 139 ++++++++++-------- ...titute_sting_alignment_bwa_c_BWACAligner.h | 8 + .../alignment/AlignmentValidationWalker.java | 47 ++---- .../sting/alignment/AlignmentWalker.java | 39 +++-- .../sting/alignment/bwa/c/BWACAligner.java | 100 +++++++------ 7 files changed, 264 insertions(+), 207 deletions(-) diff --git a/c/bwa/bwa_gateway.cpp b/c/bwa/bwa_gateway.cpp index dbb3d39bd..66085b075 100644 --- a/c/bwa/bwa_gateway.cpp +++ b/c/bwa/bwa_gateway.cpp @@ -38,8 +38,7 @@ BWA::~BWA() { void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count) { - bwa_seq_t* sequence = create_sequence(); - copy_bases_into_sequence(sequence, bases, read_length); + bwa_seq_t* sequence = create_sequence(bases, read_length); // Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln). // This method will destroy the contents of seq and rseq. @@ -57,6 +56,26 @@ void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& bwa_free_read_seq(1,sequence); } +Alignment BWA::generate_single_alignment(const char* bases, const unsigned read_length) { + bwa_seq_t* sequence = create_sequence(bases,read_length); + + // Calculate paths. + bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options); + + // bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in. + copy_bases_into_sequence(sequence,bases,read_length); + + // Pick best alignment and propagate its information into the sequence. + bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); + + // Generate the best alignment from the sequence. + Alignment alignment = generate_final_alignment_from_sequence(sequence); + + bwa_free_read_seq(1,sequence); + + return alignment; +} + void BWA::generate_alignments_from_paths(const char* bases, const unsigned read_length, bwt_aln1_t* paths, @@ -66,8 +85,7 @@ void BWA::generate_alignments_from_paths(const char* bases, Alignment*& alignments, unsigned& num_alignments) { - bwa_seq_t* sequence = create_sequence(); - copy_bases_into_sequence(sequence, bases, read_length); + bwa_seq_t* sequence = create_sequence(bases,read_length); sequence->aln = paths; sequence->n_aln = num_paths; @@ -108,35 +126,8 @@ void BWA::generate_alignments_from_paths(const char* bases, if(alignment_idx > 0) seq_reverse(sequence->len, sequence->seq, 0); - // Calculate the local coordinate and local alignment. - bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr); - bwa_refine_gapped(bns, 1, sequence, reference, NULL); - // Copy the local alignment data into the alignment object. - Alignment& alignment = *(alignments + alignment_idx); - - // Populate basic path info - alignment.num_mismatches = sequence->n_mm; - alignment.num_gap_opens = sequence->n_gapo; - alignment.num_gap_extensions = sequence->n_gape; - alignment.num_best = sequence->c1; - alignment.num_second_best = sequence->c2; - - alignment.type = sequence->type; - bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig); - alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1; - alignment.negative_strand = sequence->strand; - alignment.mapping_quality = sequence->mapQ; - - alignment.cigar = NULL; - if(sequence->cigar) { - alignment.cigar = new uint16_t[sequence->n_cigar]; - memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t)); - } - alignment.n_cigar = sequence->n_cigar; - - delete[] sequence->md; - sequence->md = NULL; + *(alignments + alignment_idx) = generate_final_alignment_from_sequence(sequence); alignment_idx++; } @@ -148,6 +139,42 @@ void BWA::generate_alignments_from_paths(const char* bases, bwa_free_read_seq(1,sequence); } +Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) { + // Calculate the local coordinate and local alignment. + bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr); + bwa_refine_gapped(bns, 1, sequence, reference, NULL); + + // Copy the local alignment data into the alignment object. + Alignment alignment; + + // Populate basic path info + alignment.num_mismatches = sequence->n_mm; + alignment.num_gap_opens = sequence->n_gapo; + alignment.num_gap_extensions = sequence->n_gape; + alignment.num_best = sequence->c1; + alignment.num_second_best = sequence->c2; + + // Final alignment position. + alignment.type = sequence->type; + bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig); + alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1; + alignment.negative_strand = sequence->strand; + alignment.mapping_quality = sequence->mapQ; + + // Cigar step. + alignment.cigar = NULL; + if(sequence->cigar) { + alignment.cigar = new uint16_t[sequence->n_cigar]; + memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t)); + } + alignment.n_cigar = sequence->n_cigar; + + delete[] sequence->md; + sequence->md = NULL; + + return alignment; +} + void BWA::load_default_options() { options.s_mm = 3; @@ -190,7 +217,7 @@ void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; } * Create a sequence with a set of reasonable initial defaults. * Will leave seq and rseq empty. */ -bwa_seq_t* BWA::create_sequence() +bwa_seq_t* BWA::create_sequence(const char* bases, const unsigned read_length) { bwa_seq_t* sequence = new bwa_seq_t; @@ -198,8 +225,8 @@ bwa_seq_t* BWA::create_sequence() sequence->name = 0; - sequence->seq = NULL; - sequence->rseq = NULL; + copy_bases_into_sequence(sequence, bases, read_length); + sequence->qual = 0; sequence->aln = 0; sequence->md = 0; diff --git a/c/bwa/bwa_gateway.h b/c/bwa/bwa_gateway.h index a794f7958..c74093f4c 100644 --- a/c/bwa/bwa_gateway.h +++ b/c/bwa/bwa_gateway.h @@ -7,7 +7,23 @@ #include "bwt.h" #include "bwtaln.h" -class Alignment; +class Alignment { + public: + uint32_t type; + int contig; + bwtint_t pos; + bool negative_strand; + uint32_t mapping_quality; + + uint16_t *cigar; + int n_cigar; + + uint8_t num_mismatches; + uint8_t num_gap_opens; + uint8_t num_gap_extensions; + uint32_t num_best; + uint32_t num_second_best; +}; class BWA { private: @@ -17,8 +33,9 @@ class BWA { gap_opt_t options; void load_default_options(); - bwa_seq_t* create_sequence(); + bwa_seq_t* create_sequence(const char* bases, const unsigned read_length); void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length); + Alignment generate_final_alignment_from_sequence(bwa_seq_t* sequence); public: BWA(const char* ann_filename, @@ -40,6 +57,8 @@ class BWA { void set_gap_extension_penalty(int penalty); // Perform the alignment + Alignment generate_single_alignment(const char* bases, + const unsigned read_length); void find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, @@ -56,22 +75,4 @@ class BWA { unsigned& num_alignments); }; -class Alignment { - public: - uint32_t type; - int contig; - bwtint_t pos; - bool negative_strand; - uint32_t mapping_quality; - - uint16_t *cigar; - int n_cigar; - - uint8_t num_mismatches; - uint8_t num_gap_opens; - uint8_t num_gap_extensions; - uint32_t num_best; - uint32_t num_second_best; -}; - #endif // BWA_GATEWAY diff --git a/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp b/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp index 7db61bf09..60a268dc8 100644 --- a/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp +++ b/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp @@ -11,6 +11,7 @@ typedef void (BWA::*int_setter)(int value); typedef void (BWA::*float_setter)(float value); +static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment); static jstring get_configuration_string(JNIEnv* env, jobject configuration, const char* field_name); static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter); static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter); @@ -228,66 +229,10 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA for(unsigned alignment_idx = 0; alignment_idx < (unsigned)num_alignments; alignment_idx++) { Alignment& alignment = *(alignments + alignment_idx); - - unsigned cigar_length; - if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0; - else if(!alignment.cigar) cigar_length = 1; - else cigar_length = alignment.n_cigar; - - jcharArray java_cigar_operators = env->NewCharArray(cigar_length); - if(java_cigar_operators == NULL) return NULL; - jintArray java_cigar_lengths = env->NewIntArray(cigar_length); - if(java_cigar_lengths == NULL) return NULL; - - if(alignment.cigar) { - for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) { - jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14]; - jint cigar_length = alignment.cigar[cigar_idx]&0x3fff; - - env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator); - if(env->ExceptionCheck()) return NULL; - env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length); - if(env->ExceptionCheck()) return NULL; - } - } - else { - if(alignment.type != BWA_TYPE_NO_MATCH) { - jchar cigar_operator = 'M'; - env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator); - if(env->ExceptionCheck()) return NULL; - env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length); - if(env->ExceptionCheck()) return NULL; - } - } - - jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment"); - if(java_alignment_class == NULL) return NULL; - - jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "", "(IIZI[C[IIIIII)V"); - if(java_alignment_constructor == NULL) return NULL; - - jobject java_alignment = env->NewObject(java_alignment_class, - java_alignment_constructor, - alignment.contig, - alignment.pos, - alignment.negative_strand, - alignment.mapping_quality, - java_cigar_operators, - java_cigar_lengths, - alignment.num_mismatches, - alignment.num_gap_opens, - alignment.num_gap_extensions, - alignment.num_best, - alignment.num_second_best); - if(java_alignment == NULL) return NULL; - - delete[] alignment.cigar; - + jobject java_alignment = convert_to_java_alignment(env,read_bases,read_length,alignment); + if(java_alignment == NULL) return NULL; env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment); if(env->ExceptionCheck()) return NULL; - - env->DeleteLocalRef(java_alignment_class); - if(env->ExceptionCheck()) return NULL; } delete[] alignments; @@ -298,6 +243,84 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA return env->ExceptionCheck() ? NULL : java_alignments; } +JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) { + BWA* bwa = (BWA*)java_bwa; + + const jsize read_length = env->GetArrayLength(java_bases); + if(env->ExceptionCheck()) return NULL; + + jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); + if(read_bases == NULL) return NULL; + + Alignment best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length); + jobject java_best_alignment = convert_to_java_alignment(env,read_bases,read_length,best_alignment); + + env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); + + return java_best_alignment; +} + +static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment) { + unsigned cigar_length; + if(alignment.type == BWA_TYPE_NO_MATCH) cigar_length = 0; + else if(!alignment.cigar) cigar_length = 1; + else cigar_length = alignment.n_cigar; + + jcharArray java_cigar_operators = env->NewCharArray(cigar_length); + if(java_cigar_operators == NULL) return NULL; + jintArray java_cigar_lengths = env->NewIntArray(cigar_length); + if(java_cigar_lengths == NULL) return NULL; + + if(alignment.cigar) { + for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) { + jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14]; + jint cigar_length = alignment.cigar[cigar_idx]&0x3fff; + + env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator); + if(env->ExceptionCheck()) return NULL; + env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length); + if(env->ExceptionCheck()) return NULL; + } + } + else { + if(alignment.type != BWA_TYPE_NO_MATCH) { + jchar cigar_operator = 'M'; + env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator); + if(env->ExceptionCheck()) return NULL; + env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length); + if(env->ExceptionCheck()) return NULL; + } + } + + jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment"); + if(java_alignment_class == NULL) return NULL; + + jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "", "(IIZI[C[IIIIII)V"); + if(java_alignment_constructor == NULL) return NULL; + + jobject java_alignment = env->NewObject(java_alignment_class, + java_alignment_constructor, + alignment.contig, + alignment.pos, + alignment.negative_strand, + alignment.mapping_quality, + java_cigar_operators, + java_cigar_lengths, + alignment.num_mismatches, + alignment.num_gap_opens, + alignment.num_gap_extensions, + alignment.num_best, + alignment.num_second_best); + if(java_alignment == NULL) return NULL; + + delete[] alignment.cigar; + + env->DeleteLocalRef(java_alignment_class); + if(env->ExceptionCheck()) return NULL; + + return java_alignment; +} + static jstring get_configuration_string(JNIEnv* env, jobject configuration, const char* field_name) { jclass configuration_class = env->GetObjectClass(configuration); if(configuration_class == NULL) return NULL; diff --git a/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h b/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h index afadb2493..5e1b3ab2b 100644 --- a/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h +++ b/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h @@ -39,6 +39,14 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments (JNIEnv *, jobject, jlong, jbyteArray, jobjectArray); +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: getBestAlignment + * Signature: (J[B)Lorg/broadinstitute/sting/alignment/Alignment; + */ +JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getBestAlignment + (JNIEnv *, jobject, jlong, jbyteArray); + #ifdef __cplusplus } #endif diff --git a/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java b/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java index 8b0ba978e..10dfca0ec 100644 --- a/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java +++ b/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java @@ -28,8 +28,6 @@ public class AlignmentValidationWalker extends ReadWalker { @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta"; - //SortedMap alignmentCounts = new TreeMap(); - /** * The instance used to generate alignments. */ @@ -55,7 +53,6 @@ public class AlignmentValidationWalker extends ReadWalker { @Override public boolean filter(char[] ref, SAMRecord read) { return true; - //return read.getReadName().contains("SL-XBC:1:13:1296:1410#0"); } /** @@ -72,16 +69,7 @@ public class AlignmentValidationWalker extends ReadWalker { if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases); boolean matches = true; - Iterator alignments = aligner.getAlignments(bases); - /* - BWAPath[] paths = aligner.getPaths(bases); - int bestCount = 0; - if(paths.length > 0) bestCount = paths[0].bestCount; - if(alignmentCounts.containsKey(bestCount)) - alignmentCounts.put(bestCount,alignmentCounts.get(bestCount)+1); - else - alignmentCounts.put(bestCount,1); - */ + Iterator alignments = aligner.getAllAlignments(bases); if(!alignments.hasNext()) { matches = read.getReadUnmappedFlag(); @@ -106,14 +94,17 @@ public class AlignmentValidationWalker extends ReadWalker { logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag())); logger.error(String.format(" Cigar: %s%n", read.getCigarString())); logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality())); - Alignment[] allAlignments = aligner.getAllAlignments(bases); - for(int i = 0; i < allAlignments.length; i++) { - logger.error(String.format("Alignment %d:",i)); - logger.error(String.format(" Contig index: %d",allAlignments[i].getContigIndex())); - logger.error(String.format(" Alignment start: %d", allAlignments[i].getAlignmentStart())); - logger.error(String.format(" Negative strand: %b", allAlignments[i].isNegativeStrand())); - logger.error(String.format(" Cigar: %s", allAlignments[i].getCigarString())); - logger.error(String.format(" Mapping quality: %s%n", allAlignments[i].getMappingQuality())); + Iterator alignmentIterator = aligner.getAllAlignments(bases); + while(alignmentIterator.hasNext()) { + Alignment[] alignmentsByScore = alignmentIterator.next(); + for(int i = 0; i < alignmentsByScore.length; i++) { + logger.error(String.format("Alignment %d:",i)); + logger.error(String.format(" Contig index: %d",alignmentsByScore[i].getContigIndex())); + logger.error(String.format(" Alignment start: %d", alignmentsByScore[i].getAlignmentStart())); + logger.error(String.format(" Negative strand: %b", alignmentsByScore[i].isNegativeStrand())); + logger.error(String.format(" Cigar: %s", alignmentsByScore[i].getCigarString())); + logger.error(String.format(" Mapping quality: %s%n", alignmentsByScore[i].getMappingQuality())); + } } throw new StingException(String.format("Read %s mismatches!", read.getReadName())); } @@ -122,8 +113,6 @@ public class AlignmentValidationWalker extends ReadWalker { logger.info(String.format("Processed %d reads", count)); } - //logger.info(String.format("validated read %s", read.getReadName())); - return 1; } @@ -153,18 +142,6 @@ public class AlignmentValidationWalker extends ReadWalker { public void onTraversalDone(Integer result) { aligner.close(); super.onTraversalDone(result); - -/* - try { - PrintWriter output = new PrintWriter("GATK_distribution.out"); - for(SortedMap.Entry results: alignmentCounts.entrySet()) - output.printf("%d\t%d%n", results.getKey(), results.getValue()); - output.close(); - } - catch(FileNotFoundException ex) { - throw new StingException("Unable to open output file"); - } -*/ } } diff --git a/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java index a2f1cbad5..e2c11d9bb 100644 --- a/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java +++ b/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -1,13 +1,14 @@ package org.broadinstitute.sting.alignment; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration; import net.sf.samtools.SAMRecord; - -import java.util.Random; -import java.util.Iterator; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileWriter; /** * Align reads to the reference specified by BWTPrefix. @@ -15,19 +16,26 @@ import java.util.Iterator; * @author mhanna * @version 0.1 */ +@WalkerName("Align") public class AlignmentWalker extends ReadWalker { @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta"; + @Argument(fullName = "outputBam", shortName = "ob", doc = "Write output to this BAM filename instead of STDOUT", required = false) + String outputBamFile = null; + + @Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false) + public Integer bamCompression = 5; + /** * The actual aligner. */ private BWACAligner aligner = null; /** - * A random number generator, used to generate alignments. + * Target for reads to output. */ - private Random random = new Random(); + private SAMFileWriter outputBam = null; /** * Create an aligner object. The aligner object will load and hold the BWT until close() is called. @@ -36,6 +44,11 @@ public class AlignmentWalker extends ReadWalker { public void initialize() { BWACConfiguration configuration = new BWACConfiguration(prefix); aligner = new BWACAligner(configuration); + + if ( outputBamFile != null ) { + SAMFileHeader header = this.getToolkit().getSAMFileHeader(); + outputBam = Utils.createSAMFileWriterWithCompression(header, false, outputBamFile, bamCompression); + } } /** @@ -46,17 +59,13 @@ public class AlignmentWalker extends ReadWalker { */ @Override public Integer map(char[] ref, SAMRecord read) { - Iterator alignedReads = aligner.align(read); - if(alignedReads.hasNext()) { - SAMRecord[] bestAlignments = alignedReads.next(); - SAMRecord selectedRead = bestAlignments[random.nextInt(bestAlignments.length)]; - out.println(selectedRead.format()); - return bestAlignments.length; - } - else { - out.println(read.format()); - return 0; + SAMRecord alignedRead = aligner.align(read); + if (outputBam != null) { + outputBam.addAlignment(alignedRead); + } else { + out.println(alignedRead.format()); } + return 1; } /** diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java index c8c2bc6b9..ded249e87 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.alignment.Alignment; import java.util.*; +import java.io.File; /** * An aligner using the BWA/C implementation. @@ -23,40 +24,57 @@ public class BWACAligner { */ private long thunkPointer = 0; - /** - * Create a pointer to the BWA/C thunk. - * @param configuration Configuration of the aligner. - * @return Pointer to the BWA/C thunk. - */ - protected native long create(BWACConfiguration configuration); - - /** - * Destroy the BWA/C thunk. - * @param thunkPointer Pointer to the allocated thunk. - */ - protected native void destroy(long thunkPointer); - public BWACAligner(BWACConfiguration configuration) { if(thunkPointer != 0) throw new StingException("BWA/C attempting to reinitialize."); + + if(!new File(configuration.annFileName).exists()) throw new StingException("ANN file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(configuration.ambFileName).exists()) throw new StingException("AMB file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(configuration.pacFileName).exists()) throw new StingException("PAC file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(configuration.forwardBWTFileName).exists()) throw new StingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(configuration.forwardSAFileName).exists()) throw new StingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(configuration.reverseBWTFileName).exists()) throw new StingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(configuration.reverseSAFileName).exists()) throw new StingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it."); + thunkPointer = create(configuration); } + /** * Close this instance of the BWA pointer and delete its resources. */ public void close() { if(thunkPointer == 0) - throw new StingException("BWA/C close attempted, but BWA/C was never properly initialized."); + throw new StingException("BWA/C close attempted, but BWA/C is not properly initialized."); destroy(thunkPointer); } + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + public Alignment getBestAlignment(final byte[] bases) { + if(thunkPointer == 0) + throw new StingException("BWA/C getBestAlignment attempted, but BWA/C is not properly initialized."); + return getBestAlignment(thunkPointer,bases); + } + + /** + * Get the best aligned read, chosen randomly from the pile of best alignments. + * @param read Read to align. + * @return Read with injected alignment data. + */ + public SAMRecord align(final SAMRecord read) { + return convertAlignmentToRead(read,getBestAlignment(read.getReadBases())); + } + /** * Get a iterator of alignments, batched by mapping quality. * @param bases List of bases. * @return Iterator to alignments. */ - public Iterator getAlignments(final byte[] bases) { + public Iterator getAllAlignments(final byte[] bases) { final BWAPath[] paths = getPaths(bases); return new Iterator() { /** @@ -95,8 +113,8 @@ public class BWACAligner { * @param read Read to align. * @return Iterator to alignments. */ - public Iterator align(final SAMRecord read) { - final Iterator alignments = getAlignments(read.getReadBases()); + public Iterator alignAll(final SAMRecord read) { + final Iterator alignments = getAllAlignments(read.getReadBases()); return new Iterator() { /** * Whether all alignments have been seen based on the current position. @@ -124,21 +142,6 @@ public class BWACAligner { }; } - /** - * Push all alignment data into individual SAMRecords, gaining in convenience but losing some of - * the additional data stored in an alignment object. - * @param read The read to align. - * @return A list of potential alignments. - */ - public SAMRecord[] alignAll(SAMRecord read) { - Alignment[] alignments = getAllAlignments(read.getReadBases()); - SAMRecord[] reads = new SAMRecord[alignments.length]; - for(int i = 0; i < alignments.length; i++) - reads[i] = convertAlignmentToRead(read,alignments[i]); - - return reads; - } - /** * Creates a read directly from an alignment. * @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags. @@ -165,18 +168,6 @@ public class BWACAligner { return read; } - /** - * Align the given base array to the BWT. The base array should be in ASCII format. - * @param bases ASCII representation of byte array. - * @return an array of indices into the bwa. - */ - public Alignment[] getAllAlignments(byte[] bases) { - if(thunkPointer == 0) - throw new StingException("BWA/C align attempted, but BWA/C is not properly initialized."); - BWAPath[] paths = getPaths(bases); - return convertPathsToAlignments(thunkPointer,bases,paths); - } - /** * Get the paths associated with the given base string. * @param bases List of bases. @@ -189,6 +180,19 @@ public class BWACAligner { return paths; } + /** + * Create a pointer to the BWA/C thunk. + * @param configuration Configuration of the aligner. + * @return Pointer to the BWA/C thunk. + */ + protected native long create(BWACConfiguration configuration); + + /** + * Destroy the BWA/C thunk. + * @param thunkPointer Pointer to the allocated thunk. + */ + protected native void destroy(long thunkPointer); + /** * Do the extra steps involved in converting a local alignment to a global alignment. * @param bases ASCII representation of byte array. @@ -218,4 +222,12 @@ public class BWACAligner { * @return A list of alignments. */ protected native Alignment[] convertPathsToAlignments(long thunkPointer, byte[] bases, BWAPath[] paths); + + /** + * Gets the best alignment from BWA/C, randomly selected from all best-aligned reads. + * @param thunkPointer Pointer to BWA thunk. + * @param bases bases to align. + * @return The best alignment from BWA/C. + */ + protected native Alignment getBestAlignment(long thunkPointer, byte[] bases); }