From 38a030f2bac875655a75c5c8916c3a1f7c3d8bdf Mon Sep 17 00:00:00 2001 From: hanna Date: Fri, 20 Nov 2009 15:21:59 +0000 Subject: [PATCH] Finishing off data transfer conduits for single alignment generator. Misc bug fixes elsewhere. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2101 348d0f76-0448-11de-a6fe-93d51630548a --- c/bwa/bwa_gateway.cpp | 16 ++++++++++--- c/bwa/bwa_gateway.h | 8 +++++-- ...tute_sting_alignment_bwa_c_BWACAligner.cpp | 16 +++++++++---- .../sting/alignment/Alignment.java | 20 ++++++++++++++++ .../sting/alignment/AlignmentWalker.java | 24 +++++++++++++++---- .../sting/alignment/bwa/c/BWACAligner.java | 23 +++++++++++------- 6 files changed, 84 insertions(+), 23 deletions(-) diff --git a/c/bwa/bwa_gateway.cpp b/c/bwa/bwa_gateway.cpp index 66085b075..d524d9be9 100644 --- a/c/bwa/bwa_gateway.cpp +++ b/c/bwa/bwa_gateway.cpp @@ -56,12 +56,18 @@ void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& bwa_free_read_seq(1,sequence); } -Alignment BWA::generate_single_alignment(const char* bases, const unsigned read_length) { +Alignment* BWA::generate_single_alignment(const char* bases, const unsigned read_length) { bwa_seq_t* sequence = create_sequence(bases,read_length); // Calculate paths. bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options); + // Check for no alignments found and return null. + if(sequence->n_aln == 0) { + bwa_free_read_seq(1,sequence); + return NULL; + } + // bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in. copy_bases_into_sequence(sequence,bases,read_length); @@ -69,7 +75,8 @@ Alignment BWA::generate_single_alignment(const char* bases, const unsigned read_ bwa_aln2seq(sequence->n_aln,sequence->aln,sequence); // Generate the best alignment from the sequence. - Alignment alignment = generate_final_alignment_from_sequence(sequence); + Alignment* alignment = new Alignment; + *alignment = generate_final_alignment_from_sequence(sequence); bwa_free_read_seq(1,sequence); @@ -148,6 +155,7 @@ Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) { Alignment alignment; // Populate basic path info + alignment.edit_distance = sequence->nm; alignment.num_mismatches = sequence->n_mm; alignment.num_gap_opens = sequence->n_gapo; alignment.num_gap_extensions = sequence->n_gape; @@ -168,7 +176,9 @@ Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) { memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t)); } alignment.n_cigar = sequence->n_cigar; - + + // MD tag with a better breakdown of differences in the cigar + alignment.md = strdup(sequence->md); delete[] sequence->md; sequence->md = NULL; diff --git a/c/bwa/bwa_gateway.h b/c/bwa/bwa_gateway.h index c74093f4c..0ef0a129b 100644 --- a/c/bwa/bwa_gateway.h +++ b/c/bwa/bwa_gateway.h @@ -21,8 +21,12 @@ class Alignment { uint8_t num_mismatches; uint8_t num_gap_opens; uint8_t num_gap_extensions; + uint16_t edit_distance; + uint32_t num_best; uint32_t num_second_best; + + char* md; }; class BWA { @@ -57,8 +61,8 @@ class BWA { void set_gap_extension_penalty(int penalty); // Perform the alignment - Alignment generate_single_alignment(const char* bases, - const unsigned read_length); + Alignment* generate_single_alignment(const char* bases, + const unsigned read_length); void find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, diff --git a/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp b/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp index 60a268dc8..f5d3e8fa2 100644 --- a/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp +++ b/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp @@ -252,8 +252,9 @@ JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAlig jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE); if(read_bases == NULL) return NULL; - Alignment best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length); - jobject java_best_alignment = convert_to_java_alignment(env,read_bases,read_length,best_alignment); + Alignment* best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length); + jobject java_best_alignment = (best_alignment != NULL) ? convert_to_java_alignment(env,read_bases,read_length,*best_alignment) : NULL; + delete best_alignment; env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE); @@ -291,12 +292,17 @@ static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, c if(env->ExceptionCheck()) return NULL; } } + delete[] alignment.cigar; jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment"); if(java_alignment_class == NULL) return NULL; - jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "", "(IIZI[C[IIIIII)V"); + jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "", "(IIZI[C[IILjava/lang/String;IIIII)V"); if(java_alignment_constructor == NULL) return NULL; + + jstring java_md = env->NewStringUTF(alignment.md); + if(java_md == NULL) return NULL; + delete[] alignment.md; jobject java_alignment = env->NewObject(java_alignment_class, java_alignment_constructor, @@ -306,6 +312,8 @@ static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, c alignment.mapping_quality, java_cigar_operators, java_cigar_lengths, + alignment.edit_distance, + java_md, alignment.num_mismatches, alignment.num_gap_opens, alignment.num_gap_extensions, @@ -313,8 +321,6 @@ static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, c alignment.num_second_best); if(java_alignment == NULL) return NULL; - delete[] alignment.cigar; - env->DeleteLocalRef(java_alignment_class); if(env->ExceptionCheck()) return NULL; diff --git a/java/src/org/broadinstitute/sting/alignment/Alignment.java b/java/src/org/broadinstitute/sting/alignment/Alignment.java index e484e046d..f91676706 100644 --- a/java/src/org/broadinstitute/sting/alignment/Alignment.java +++ b/java/src/org/broadinstitute/sting/alignment/Alignment.java @@ -19,6 +19,9 @@ public class Alignment { protected char[] cigarOperators; protected int[] cigarLengths; + protected int editDistance; + protected String mismatchingPositions; + protected int numMismatches; protected int numGapOpens; protected int numGapExtensions; @@ -49,6 +52,19 @@ public class Alignment { */ public int getMappingQuality() { return mappingQuality; } + /** + * Gets the edit distance; will eventually end up in the NM SAM tag + * if this alignment makes it that far. + * @return The edit distance. + */ + public int getEditDistance() { return editDistance; } + + /** + * A string representation of which positions mismatch; contents of MD tag. + * @return String representation of mismatching positions. + */ + public String getMismatchingPositions() { return mismatchingPositions; } + /** * Gets the number of mismatches in the read. * @return Number of mismatches. @@ -129,6 +145,8 @@ public class Alignment { int mappingQuality, char[] cigarOperators, int[] cigarLengths, + int editDistance, + String mismatchingPositions, int numMismatches, int numGapOpens, int numGapExtensions, @@ -140,6 +158,8 @@ public class Alignment { this.mappingQuality = mappingQuality; this.cigarOperators = cigarOperators; this.cigarLengths = cigarLengths; + this.editDistance = editDistance; + this.mismatchingPositions = mismatchingPositions; this.numMismatches = numMismatches; this.numGapOpens = numGapOpens; this.numGapExtensions = numGapExtensions; diff --git a/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java index e2c11d9bb..f86a40b4e 100644 --- a/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java +++ b/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -6,9 +6,8 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.*; +import net.sf.picard.reference.ReferenceSequenceFileFactory; /** * Align reads to the reference specified by BWTPrefix. @@ -37,6 +36,14 @@ public class AlignmentWalker extends ReadWalker { */ private SAMFileWriter outputBam = null; + /** Must return true for reads that need to be processed. Reads, for which this method return false will + * be skipped by the engine and never passed to the walker. + */ + @Override + public boolean filter(char[] ref, SAMRecord read) { + return true; + } + /** * Create an aligner object. The aligner object will load and hold the BWT until close() is called. */ @@ -45,8 +52,17 @@ public class AlignmentWalker extends ReadWalker { BWACConfiguration configuration = new BWACConfiguration(prefix); aligner = new BWACAligner(configuration); + // HACK: If the sequence dictionary in the existing header is null, stuff the contents of the current reference + // into it, so that the sequence has something to which to back-align. + SAMFileHeader header = getToolkit().getSAMFileHeader(); + if(header.getSequenceDictionary().isEmpty()) { + SAMSequenceDictionary referenceDictionary = + ReferenceSequenceFileFactory.getReferenceSequenceFile(getToolkit().getArguments().referenceFile).getSequenceDictionary(); + header.setSequenceDictionary(referenceDictionary); + } + if ( outputBamFile != null ) { - SAMFileHeader header = this.getToolkit().getSAMFileHeader(); + // Stuff the header from the fasta into that of the sequence dictionary. outputBam = Utils.createSAMFileWriterWithCompression(header, false, outputBamFile, bamCompression); } } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java index ded249e87..8da04b055 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java @@ -145,21 +145,26 @@ public class BWACAligner { /** * Creates a read directly from an alignment. * @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags. - * @param alignment The target alignment for this read. + * @param alignment The target alignment for this read. If alignment is null, assume the read is unmappe.d * @return A mapped alignment. */ public SAMRecord convertAlignmentToRead(SAMRecord unmappedRead, Alignment alignment) { SAMRecord read = null; try { read = (SAMRecord)unmappedRead.clone(); - read.setReadUmappedFlag(false); - read.setAlignmentStart((int)alignment.getAlignmentStart()); - read.setReadNegativeStrandFlag(alignment.isNegativeStrand()); - read.setMappingQuality(alignment.getMappingQuality()); - read.setCigar(alignment.getCigar()); - if(alignment.isNegativeStrand()) { - read.setReadBases(BaseUtils.reverse(read.getReadBases())); - read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities())); + if(alignment != null) { + read.setReadUmappedFlag(false); + read.setReferenceIndex(alignment.getContigIndex()); + read.setAlignmentStart((int)alignment.getAlignmentStart()); + read.setReadNegativeStrandFlag(alignment.isNegativeStrand()); + read.setMappingQuality(alignment.getMappingQuality()); + read.setCigar(alignment.getCigar()); + if(alignment.isNegativeStrand()) { + read.setReadBases(BaseUtils.reverse(read.getReadBases())); + read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities())); + } + read.setAttribute("NM",alignment.getEditDistance()); + read.setAttribute("MD",alignment.getMismatchingPositions()); } } catch(CloneNotSupportedException ex) {