From 4fbb6d05d0441e21490a5b0ec032bb8c2200955b Mon Sep 17 00:00:00 2001 From: hanna Date: Fri, 20 Nov 2009 17:08:09 +0000 Subject: [PATCH] Refactoring. Push the revisions to the common aligner interface down into the aligner base classes. Hack the managed implementation to support the new interface. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2103 348d0f76-0448-11de-a6fe-93d51630548a --- ...tute_sting_alignment_bwa_c_BWACAligner.cpp | 50 +++++----- ...titute_sting_alignment_bwa_c_BWACAligner.h | 12 ++- .../sting/alignment/Aligner.java | 33 ++++++- .../sting/alignment/Alignment.java | 51 +++++++++- .../alignment/AlignmentValidationWalker.java | 13 +-- .../sting/alignment/AlignmentWalker.java | 22 +++-- .../sting/alignment/bwa/BWAAligner.java | 31 ++++++ .../sting/alignment/bwa/BWAConfiguration.java | 46 +++++++++ .../sting/alignment/bwa/BWTFiles.java | 62 ++++++++++++ .../sting/alignment/bwa/c/BWACAligner.java | 94 +++++++++--------- .../alignment/bwa/c/BWACConfiguration.java | 98 ------------------- .../bwa/java/AlignerTestHarness.java | 47 +++++---- .../alignment/bwa/java/BWAAlignment.java | 6 +- .../{BWAAligner.java => BWAJavaAligner.java} | 41 +++++++- 14 files changed, 393 insertions(+), 213 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java create mode 100644 java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java create mode 100644 java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java delete mode 100644 java/src/org/broadinstitute/sting/alignment/bwa/c/BWACConfiguration.java rename java/src/org/broadinstitute/sting/alignment/bwa/java/{BWAAligner.java => BWAJavaAligner.java} (88%) diff --git a/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp b/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp index f5d3e8fa2..580d9c74c 100644 --- a/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp +++ b/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp @@ -17,21 +17,21 @@ static void set_int_configuration_param(JNIEnv* env, jobject configuration, cons static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter); static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message); -JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject configuration) +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration) { - jstring java_ann = get_configuration_string(env,configuration,"annFileName"); + jstring java_ann = get_configuration_string(env,bwtFiles,"annFileName"); if(env->ExceptionCheck()) return 0L; - jstring java_amb = get_configuration_string(env,configuration,"ambFileName"); + jstring java_amb = get_configuration_string(env,bwtFiles,"ambFileName"); if(env->ExceptionCheck()) return 0L; - jstring java_pac = get_configuration_string(env,configuration,"pacFileName"); + jstring java_pac = get_configuration_string(env,bwtFiles,"pacFileName"); if(env->ExceptionCheck()) return 0L; - jstring java_forward_bwt = get_configuration_string(env,configuration,"forwardBWTFileName"); + jstring java_forward_bwt = get_configuration_string(env,bwtFiles,"forwardBWTFileName"); if(env->ExceptionCheck()) return 0L; - jstring java_forward_sa = get_configuration_string(env,configuration,"forwardSAFileName"); + jstring java_forward_sa = get_configuration_string(env,bwtFiles,"forwardSAFileName"); if(env->ExceptionCheck()) return 0L; - jstring java_reverse_bwt = get_configuration_string(env,configuration,"reverseBWTFileName"); + jstring java_reverse_bwt = get_configuration_string(env,bwtFiles,"reverseBWTFileName"); if(env->ExceptionCheck()) return 0L; - jstring java_reverse_sa = get_configuration_string(env,configuration,"reverseSAFileName"); + jstring java_reverse_sa = get_configuration_string(env,bwtFiles,"reverseSAFileName"); if(env->ExceptionCheck()) return 0L; const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE); @@ -57,20 +57,8 @@ JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligne reverse_bwt_filename, reverse_sa_filename); - set_float_configuration_param(env, configuration, "maximumEditDistance", bwa, &BWA::set_max_edit_distance); - if(env->ExceptionCheck()) return 0L; - set_int_configuration_param(env, configuration, "maximumGapOpens", bwa, &BWA::set_max_gap_opens); - if(env->ExceptionCheck()) return 0L; - set_int_configuration_param(env, configuration, "maximumGapExtensions", bwa, &BWA::set_max_gap_extensions); - if(env->ExceptionCheck()) return 0L; - set_int_configuration_param(env, configuration, "disallowIndelWithinRange", bwa, &BWA::set_disallow_indel_within_range); - if(env->ExceptionCheck()) return 0L; - set_int_configuration_param(env, configuration, "mismatchPenalty", bwa, &BWA::set_mismatch_penalty); - if(env->ExceptionCheck()) return 0L; - set_int_configuration_param(env, configuration, "gapOpenPenalty", bwa, &BWA::set_gap_open_penalty); - if(env->ExceptionCheck()) return 0L; - set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty); - if(env->ExceptionCheck()) return 0L; + Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(env,instance,(jlong)bwa,configuration); + if(env->ExceptionCheck()) return 0L; env->ReleaseStringUTFChars(java_ann,ann_filename); if(env->ExceptionCheck()) return 0L; @@ -96,6 +84,24 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner delete bwa; } +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(JNIEnv *env, jobject instance, jlong java_bwa, jobject configuration) { + BWA* bwa = (BWA*)java_bwa; + set_float_configuration_param(env, configuration, "maximumEditDistance", bwa, &BWA::set_max_edit_distance); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "maximumGapOpens", bwa, &BWA::set_max_gap_opens); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "maximumGapExtensions", bwa, &BWA::set_max_gap_extensions); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "disallowIndelWithinRange", bwa, &BWA::set_disallow_indel_within_range); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "mismatchPenalty", bwa, &BWA::set_mismatch_penalty); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "gapOpenPenalty", bwa, &BWA::set_gap_open_penalty); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty); + if(env->ExceptionCheck()) return; +} + JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) { BWA* bwa = (BWA*)java_bwa; diff --git a/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h b/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h index 5e1b3ab2b..0c44e430a 100644 --- a/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h +++ b/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h @@ -10,10 +10,18 @@ extern "C" { /* * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner * Method: create - * Signature: (Lorg/broadinstitute/sting/alignment/bwa/c/BWACConfiguration;)J + * Signature: (Lorg/broadinstitute/sting/alignment/bwa/BWTFiles;Lorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)J */ JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create - (JNIEnv *, jobject, jobject); + (JNIEnv *, jobject, jobject, jobject); + +/* + * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner + * Method: updateConfiguration + * Signature: (JLorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration + (JNIEnv *, jobject, jlong, jobject); /* * Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner diff --git a/java/src/org/broadinstitute/sting/alignment/Aligner.java b/java/src/org/broadinstitute/sting/alignment/Aligner.java index 75649ae11..f57c75165 100644 --- a/java/src/org/broadinstitute/sting/alignment/Aligner.java +++ b/java/src/org/broadinstitute/sting/alignment/Aligner.java @@ -1,8 +1,10 @@ package org.broadinstitute.sting.alignment; import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileHeader; import java.util.List; +import java.util.Iterator; /** * Create perfect alignments from the read to the genome represented by the given BWT / suffix array. @@ -11,11 +13,40 @@ import java.util.List; * @version 0.1 */ public interface Aligner { + /** + * Close this instance of the BWA pointer and delete its resources. + */ + public void close(); + + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + public Alignment getBestAlignment(final byte[] bases); + /** * Align the read to the reference. * @param read Read to align. + * @param header Optional header to drop in place. * @return A list of the alignments. */ - public List align(SAMRecord read); + public SAMRecord align(final SAMRecord read, final SAMFileHeader header); + + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + public Iterator getAllAlignments(final byte[] bases); + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. + * @return Iterator to alignments. + */ + public Iterator alignAll(final SAMRecord read, final SAMFileHeader newHeader); } + diff --git a/java/src/org/broadinstitute/sting/alignment/Alignment.java b/java/src/org/broadinstitute/sting/alignment/Alignment.java index f91676706..5f5b53070 100644 --- a/java/src/org/broadinstitute/sting/alignment/Alignment.java +++ b/java/src/org/broadinstitute/sting/alignment/Alignment.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.alignment; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.CigarElement; +import net.sf.samtools.*; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.BaseUtils; /** * Represents an alignment of a read to a site in the reference genome. @@ -138,6 +138,13 @@ public class Alignment { * @param mappingQuality How good does BWA think this mapping is? * @param cigarOperators The ordered operators in the cigar string. * @param cigarLengths The lengths to which each operator applies. + * @param editDistance The edit distance (cumulative) of the read. + * @param mismatchingPositions String representation of which bases in the read mismatch. + * @param numMismatches Number of total mismatches in the read. + * @param numGapOpens Number of gap opens in the read. + * @param numGapExtensions Number of gap extensions in the read. + * @param bestCount Number of best alignments in the read. + * @param secondBestCount Number of second best alignments in the read. */ public Alignment(int contigIndex, int alignmentStart, @@ -166,4 +173,42 @@ public class Alignment { this.bestCount = bestCount; this.secondBestCount = secondBestCount; } + + /** + * Creates a read directly from an alignment. + * @param alignment The alignment to convert to a read. + * @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags. + * @param newSAMHeader The new SAM header to use in creating this read. Can be null, but if so, the sequence + * dictionary in the + * @return A mapped alignment. + */ + public static SAMRecord convertToRead(Alignment alignment, SAMRecord unmappedRead, SAMFileHeader newSAMHeader) { + SAMRecord read; + try { + read = (SAMRecord)unmappedRead.clone(); + } + catch(CloneNotSupportedException ex) { + throw new StingException("Unable to create aligned read from template."); + } + + if(newSAMHeader != null) + read.setHeader(newSAMHeader); + + if(alignment != null) { + read.setReadUmappedFlag(false); + read.setReferenceIndex(alignment.getContigIndex()); + read.setAlignmentStart((int)alignment.getAlignmentStart()); + read.setReadNegativeStrandFlag(alignment.isNegativeStrand()); + read.setMappingQuality(alignment.getMappingQuality()); + read.setCigar(alignment.getCigar()); + if(alignment.isNegativeStrand()) { + read.setReadBases(BaseUtils.reverse(read.getReadBases())); + read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities())); + } + read.setAttribute("NM",alignment.getEditDistance()); + read.setAttribute("MD",alignment.getMismatchingPositions()); + } + + return read; + } } diff --git a/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java b/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java index 10dfca0ec..011836ed2 100644 --- a/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java +++ b/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java @@ -5,15 +5,11 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; -import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration; -import org.broadinstitute.sting.alignment.bwa.c.BWAPath; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; import net.sf.samtools.SAMRecord; import java.util.Iterator; -import java.util.SortedMap; -import java.util.TreeMap; -import java.io.PrintWriter; -import java.io.FileNotFoundException; /** * Validates alignments against existing reads. @@ -43,8 +39,9 @@ public class AlignmentValidationWalker extends ReadWalker { */ @Override public void initialize() { - BWACConfiguration configuration = new BWACConfiguration(prefix); - aligner = new BWACAligner(configuration); + BWTFiles bwtFiles = new BWTFiles(prefix); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); } /** Must return true for reads that need to be processed. Reads, for which this method return false will diff --git a/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java index f86a40b4e..632ec97a1 100644 --- a/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java +++ b/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -5,7 +5,8 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; -import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; import net.sf.samtools.*; import net.sf.picard.reference.ReferenceSequenceFileFactory; @@ -36,6 +37,11 @@ public class AlignmentWalker extends ReadWalker { */ private SAMFileWriter outputBam = null; + /** + * New header to use, if desired. + */ + private SAMFileHeader header = null; + /** Must return true for reads that need to be processed. Reads, for which this method return false will * be skipped by the engine and never passed to the walker. */ @@ -49,17 +55,21 @@ public class AlignmentWalker extends ReadWalker { */ @Override public void initialize() { - BWACConfiguration configuration = new BWACConfiguration(prefix); - aligner = new BWACAligner(configuration); + BWTFiles bwtFiles = new BWTFiles(prefix); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); // HACK: If the sequence dictionary in the existing header is null, stuff the contents of the current reference // into it, so that the sequence has something to which to back-align. - SAMFileHeader header = getToolkit().getSAMFileHeader(); - if(header.getSequenceDictionary().isEmpty()) { + SAMFileHeader originalHeader = getToolkit().getSAMFileHeader(); + if(originalHeader.getSequenceDictionary().isEmpty()) { + header = originalHeader.clone(); SAMSequenceDictionary referenceDictionary = ReferenceSequenceFileFactory.getReferenceSequenceFile(getToolkit().getArguments().referenceFile).getSequenceDictionary(); header.setSequenceDictionary(referenceDictionary); } + else + header = originalHeader; if ( outputBamFile != null ) { // Stuff the header from the fasta into that of the sequence dictionary. @@ -75,7 +85,7 @@ public class AlignmentWalker extends ReadWalker { */ @Override public Integer map(char[] ref, SAMRecord read) { - SAMRecord alignedRead = aligner.align(read); + SAMRecord alignedRead = aligner.align(read,header); if (outputBam != null) { outputBam.addAlignment(alignedRead); } else { diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java new file mode 100644 index 000000000..75cde973e --- /dev/null +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -0,0 +1,31 @@ +package org.broadinstitute.sting.alignment.bwa; + +import org.broadinstitute.sting.alignment.Aligner; +import org.broadinstitute.sting.alignment.Alignment; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileHeader; + +import java.util.Iterator; + +/** + * Align reads using BWA. + * + * @author mhanna + * @version 0.1 + */ +public abstract class BWAAligner implements Aligner { + + /** + * Create a new BWAAligner. Purpose of this call is to ensure that all BWA constructors accept the correct + * parameters. + * @param bwtFiles The many files representing BWTs persisted to disk. + * @param configuration Configuration parameters for the alignment. + */ + public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { } + + /** + * Update the configuration passed to the BWA aligner. + * @param configuration New configuration to set. + */ + public abstract void updateConfiguration(BWAConfiguration configuration); +} diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java new file mode 100644 index 000000000..fb8000aa2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java @@ -0,0 +1,46 @@ +package org.broadinstitute.sting.alignment.bwa; + +import org.broadinstitute.sting.utils.StingException; + +/** + * Configuration for the BWA/C aligner. + * + * @author mhanna + * @version 0.1 + */ +public class BWAConfiguration { + /** + * The maximum edit distance used by BWA. + */ + public Float maximumEditDistance = null; + + /** + * How many gap opens are acceptable within this alignment? + */ + public Integer maximumGapOpens = null; + + /** + * How many gap extensions are acceptable within this alignment? + */ + public Integer maximumGapExtensions = null; + + /** + * Do we disallow indels within a certain range from the start / end? + */ + public Integer disallowIndelWithinRange = null; + + /** + * What is the scoring penalty for a mismatch? + */ + public Integer mismatchPenalty = null; + + /** + * What is the scoring penalty for a gap open? + */ + public Integer gapOpenPenalty = null; + + /** + * What is the scoring penalty for a gap extension? + */ + public Integer gapExtensionPenalty = null; +} diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java new file mode 100644 index 000000000..b745528b6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java @@ -0,0 +1,62 @@ +package org.broadinstitute.sting.alignment.bwa; + +import org.broadinstitute.sting.utils.StingException; + +/** + * Support files for BWT. + * + * @author mhanna + * @version 0.1 + */ +public class BWTFiles { + /** + * ANN (?) file name. + */ + public final String annFileName; + + /** + * AMB (?) file name. + */ + public final String ambFileName; + + /** + * Packed reference sequence file. + */ + public final String pacFileName; + + /** + * Forward BWT file. + */ + public final String forwardBWTFileName; + + /** + * Forward suffix array file. + */ + public final String forwardSAFileName; + + /** + * Reverse BWT file. + */ + public final String reverseBWTFileName; + + /** + * Reverse suffix array file. + */ + public final String reverseSAFileName; + + /** + * Create a new BWA configuration file using the given prefix. + * @param prefix Prefix to use when creating the configuration. Must not be null. + */ + public BWTFiles(String prefix) { + if(prefix == null) + throw new StingException("Prefix must not be null."); + annFileName = prefix + ".ann"; + ambFileName = prefix + ".amb"; + pacFileName = prefix + ".pac"; + forwardBWTFileName = prefix + ".bwt"; + forwardSAFileName = prefix + ".sa"; + reverseBWTFileName = prefix + ".rbwt"; + reverseSAFileName = prefix + ".rsa"; + } +} diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java index 8da04b055..f85fafcbc 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java @@ -1,9 +1,13 @@ package org.broadinstitute.sting.alignment.bwa.c; import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.BWAAligner; import java.util.*; import java.io.File; @@ -14,7 +18,7 @@ import java.io.File; * @author mhanna * @version 0.1 */ -public class BWACAligner { +public class BWACAligner extends BWAAligner { static { System.loadLibrary("bwa"); } @@ -24,25 +28,37 @@ public class BWACAligner { */ private long thunkPointer = 0; - public BWACAligner(BWACConfiguration configuration) { + public BWACAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { + super(bwtFiles,configuration); if(thunkPointer != 0) throw new StingException("BWA/C attempting to reinitialize."); - if(!new File(configuration.annFileName).exists()) throw new StingException("ANN file is missing; please rerun 'bwa aln' to regenerate it."); - if(!new File(configuration.ambFileName).exists()) throw new StingException("AMB file is missing; please rerun 'bwa aln' to regenerate it."); - if(!new File(configuration.pacFileName).exists()) throw new StingException("PAC file is missing; please rerun 'bwa aln' to regenerate it."); - if(!new File(configuration.forwardBWTFileName).exists()) throw new StingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it."); - if(!new File(configuration.forwardSAFileName).exists()) throw new StingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it."); - if(!new File(configuration.reverseBWTFileName).exists()) throw new StingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it."); - if(!new File(configuration.reverseSAFileName).exists()) throw new StingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(bwtFiles.annFileName).exists()) throw new StingException("ANN file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(bwtFiles.ambFileName).exists()) throw new StingException("AMB file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(bwtFiles.pacFileName).exists()) throw new StingException("PAC file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(bwtFiles.forwardBWTFileName).exists()) throw new StingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(bwtFiles.forwardSAFileName).exists()) throw new StingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(bwtFiles.reverseBWTFileName).exists()) throw new StingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it."); + if(!new File(bwtFiles.reverseSAFileName).exists()) throw new StingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it."); - thunkPointer = create(configuration); + thunkPointer = create(bwtFiles,configuration); } + /** + * Update the configuration passed to the BWA aligner. + * @param configuration New configuration to set. + */ + @Override + public void updateConfiguration(BWAConfiguration configuration) { + if(thunkPointer != 0) + throw new StingException("BWA/C: attempting to update configuration of uninitialized aligner."); + updateConfiguration(thunkPointer,configuration); + } /** * Close this instance of the BWA pointer and delete its resources. */ + @Override public void close() { if(thunkPointer == 0) throw new StingException("BWA/C close attempted, but BWA/C is not properly initialized."); @@ -54,6 +70,7 @@ public class BWACAligner { * @param bases Bases to align. * @return An align */ + @Override public Alignment getBestAlignment(final byte[] bases) { if(thunkPointer == 0) throw new StingException("BWA/C getBestAlignment attempted, but BWA/C is not properly initialized."); @@ -63,10 +80,12 @@ public class BWACAligner { /** * Get the best aligned read, chosen randomly from the pile of best alignments. * @param read Read to align. + * @param newHeader New header to apply to this SAM file. Can be null, but if so, read header must be valid. * @return Read with injected alignment data. */ - public SAMRecord align(final SAMRecord read) { - return convertAlignmentToRead(read,getBestAlignment(read.getReadBases())); + @Override + public SAMRecord align(final SAMRecord read, final SAMFileHeader newHeader) { + return Alignment.convertToRead(getBestAlignment(read.getReadBases()),read,newHeader); } /** @@ -74,6 +93,7 @@ public class BWACAligner { * @param bases List of bases. * @return Iterator to alignments. */ + @Override public Iterator getAllAlignments(final byte[] bases) { final BWAPath[] paths = getPaths(bases); return new Iterator() { @@ -111,9 +131,11 @@ public class BWACAligner { /** * Get a iterator of aligned reads, batched by mapping quality. * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. * @return Iterator to alignments. */ - public Iterator alignAll(final SAMRecord read) { + @Override + public Iterator alignAll(final SAMRecord read, final SAMFileHeader newHeader) { final Iterator alignments = getAllAlignments(read.getReadBases()); return new Iterator() { /** @@ -130,7 +152,7 @@ public class BWACAligner { Alignment[] alignmentsOfQuality = alignments.next(); SAMRecord[] reads = new SAMRecord[alignmentsOfQuality.length]; for(int i = 0; i < alignmentsOfQuality.length; i++) { - reads[i] = convertAlignmentToRead(read,alignmentsOfQuality[i]); + reads[i] = Alignment.convertToRead(alignmentsOfQuality[i],read,newHeader); } return reads; } @@ -142,37 +164,6 @@ public class BWACAligner { }; } - /** - * Creates a read directly from an alignment. - * @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags. - * @param alignment The target alignment for this read. If alignment is null, assume the read is unmappe.d - * @return A mapped alignment. - */ - public SAMRecord convertAlignmentToRead(SAMRecord unmappedRead, Alignment alignment) { - SAMRecord read = null; - try { - read = (SAMRecord)unmappedRead.clone(); - if(alignment != null) { - read.setReadUmappedFlag(false); - read.setReferenceIndex(alignment.getContigIndex()); - read.setAlignmentStart((int)alignment.getAlignmentStart()); - read.setReadNegativeStrandFlag(alignment.isNegativeStrand()); - read.setMappingQuality(alignment.getMappingQuality()); - read.setCigar(alignment.getCigar()); - if(alignment.isNegativeStrand()) { - read.setReadBases(BaseUtils.reverse(read.getReadBases())); - read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities())); - } - read.setAttribute("NM",alignment.getEditDistance()); - read.setAttribute("MD",alignment.getMismatchingPositions()); - } - } - catch(CloneNotSupportedException ex) { - throw new StingException("Unable to create aligned read from template."); - } - return read; - } - /** * Get the paths associated with the given base string. * @param bases List of bases. @@ -181,16 +172,23 @@ public class BWACAligner { public BWAPath[] getPaths(byte[] bases) { if(thunkPointer == 0) throw new StingException("BWA/C getPaths attempted, but BWA/C is not properly initialized."); - BWAPath[] paths = getPaths(thunkPointer,bases); - return paths; + return getPaths(thunkPointer,bases); } /** * Create a pointer to the BWA/C thunk. + * @param files BWT source files. * @param configuration Configuration of the aligner. * @return Pointer to the BWA/C thunk. */ - protected native long create(BWACConfiguration configuration); + protected native long create(BWTFiles files, BWAConfiguration configuration); + + /** + * Update the configuration passed to the BWA aligner. For internal use only. + * @param thunkPointer pointer to BWA object. + * @param configuration New configuration to set. + */ + protected native void updateConfiguration(long thunkPointer, BWAConfiguration configuration); /** * Destroy the BWA/C thunk. diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACConfiguration.java b/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACConfiguration.java deleted file mode 100644 index 96eee7b64..000000000 --- a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACConfiguration.java +++ /dev/null @@ -1,98 +0,0 @@ -package org.broadinstitute.sting.alignment.bwa.c; - -import org.broadinstitute.sting.utils.StingException; - -/** - * Configuration for the BWA/C aligner. - * - * @author mhanna - * @version 0.1 - */ -public class BWACConfiguration { - /** - * ANN (?) file name. - */ - public String annFileName = null; - - /** - * AMB (?) file name. - */ - public String ambFileName = null; - - /** - * Packed reference sequence file. - */ - public String pacFileName = null; - - /** - * Forward BWT file. - */ - public String forwardBWTFileName = null; - - /** - * Forward suffix array file. - */ - public String forwardSAFileName = null; - - /** - * Reverse BWT file. - */ - public String reverseBWTFileName = null; - - /** - * Reverse suffix array file. - */ - public String reverseSAFileName = null; - - /** - * The maximum edit distance used by BWA. - */ - public Float maximumEditDistance = null; - - /** - * How many gap opens are acceptable within this alignment? - */ - public Integer maximumGapOpens = null; - - /** - * How many gap extensions are acceptable within this alignment? - */ - public Integer maximumGapExtensions = null; - - /** - * Do we disallow indels within a certain range from the start / end? - */ - public Integer disallowIndelWithinRange = null; - - /** - * What is the scoring penalty for a mismatch? - */ - public Integer mismatchPenalty = null; - - /** - * What is the scoring penalty for a gap open? - */ - public Integer gapOpenPenalty = null; - - /** - * What is the scoring penalty for a gap extension? - */ - public Integer gapExtensionPenalty = null; - - /** - * Create a new BWA configuration file using the given prefix. - * @param prefix Prefix to use when creating the configuration. Must not be null. - */ - public BWACConfiguration(String prefix) { - if(prefix == null) - throw new StingException("Prefix must not be null."); - annFileName = prefix + ".ann"; - ambFileName = prefix + ".amb"; - pacFileName = prefix + ".pac"; - forwardBWTFileName = prefix + ".bwt"; - forwardSAFileName = prefix + ".sa"; - reverseBWTFileName = prefix + ".rbwt"; - reverseSAFileName = prefix + ".rsa"; - } - -} diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java b/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java index c3499ca8c..48096fb19 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.alignment.bwa.java; import org.broadinstitute.sting.alignment.Aligner; import org.broadinstitute.sting.alignment.Alignment; -import org.broadinstitute.sting.alignment.bwa.java.BWAAligner; +import org.broadinstitute.sting.alignment.bwa.java.BWAJavaAligner; import org.broadinstitute.sting.alignment.bwa.java.BWAAlignment; import org.broadinstitute.sting.alignment.bwa.java.AlignmentState; import org.broadinstitute.sting.utils.StingException; @@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import java.io.File; import java.io.FileNotFoundException; -import java.util.List; +import java.util.Iterator; import net.sf.samtools.*; @@ -39,7 +39,7 @@ public class AlignerTestHarness { } private static void align(File referenceFile, File bwtFile, File rbwtFile, File suffixArrayFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException { - Aligner aligner = new BWAAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile); + Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile); int count = 0; SAMFileReader reader = new SAMFileReader(bamFile); @@ -79,21 +79,24 @@ public class AlignerTestHarness { // Clear everything except flags pertaining to pairing and set 'unmapped' status to true. alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C); - List alignments = aligner.align(alignmentCleaned); - if(alignments.size() == 0 ) { + Iterator alignments = aligner.getAllAlignments(alignmentCleaned.getReadBases()); + if(!alignments.hasNext() ) { //throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count)); System.out.printf("Unable to align read %s to reference; count = %d%n",read.getReadName(),count); failures++; } Alignment foundAlignment = null; - for( Alignment alignment: alignments ) { - if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() ) - continue; - if( read.getAlignmentStart() != alignment.getAlignmentStart() ) - continue; + while(alignments.hasNext()) { + Alignment[] alignmentsOfQuality = alignments.next(); + for(Alignment alignment: alignmentsOfQuality) { + if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() ) + continue; + if( read.getAlignmentStart() != alignment.getAlignmentStart() ) + continue; - foundAlignment = alignment; + foundAlignment = alignment; + } } if( foundAlignment != null ) { @@ -113,18 +116,22 @@ public class AlignerTestHarness { String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()+numDeletions-1).getBases()); System.out.printf("expected ref = %s%n", formatBasesBasedOnCigar(expectedRef,read.getCigar(),CigarOperator.INSERTION)); - for( Alignment alignment: alignments ) { - System.out.println(); + Iterator alignmentsToOutput = aligner.getAllAlignments(alignmentCleaned.getReadBases()); + while(alignmentsToOutput.hasNext()) { + Alignment[] alignmentsOfQuality = alignments.next(); + for(Alignment alignment: alignmentsOfQuality) { + System.out.println(); - Cigar cigar = ((BWAAlignment)alignment).getCigar(); + Cigar cigar = ((BWAAlignment)alignment).getCigar(); - System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION)); + System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION)); - int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION); - String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases()); - System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION), - alignment.getAlignmentStart(), - alignment.isNegativeStrand()); + int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION); + String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases()); + System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION), + alignment.getAlignmentStart(), + alignment.isNegativeStrand()); + } } //throw new StingException(String.format("Read %s was placed at incorrect location; count = %d%n",read.getReadName(),count)); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java b/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java index cc912dd82..7cbc362a0 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAlignment.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.alignment.bwa.java; import org.broadinstitute.sting.alignment.Alignment; -import org.broadinstitute.sting.alignment.bwa.java.BWAAligner; +import org.broadinstitute.sting.alignment.bwa.java.BWAJavaAligner; import org.broadinstitute.sting.alignment.bwa.java.AlignmentMatchSequence; import org.broadinstitute.sting.alignment.bwa.java.AlignmentState; import org.broadinstitute.sting.utils.StingException; @@ -28,7 +28,7 @@ public class BWAAlignment extends Alignment implements Cloneable { /** * The aligner performing the alignments. */ - protected BWAAligner aligner; + protected BWAJavaAligner aligner; /** * The sequence of matches/mismatches/insertions/deletions. @@ -136,7 +136,7 @@ public class BWAAlignment extends Alignment implements Cloneable { * Create a new alignment with the given parent aligner. * @param aligner Aligner being used. */ - public BWAAlignment( BWAAligner aligner ) { + public BWAAlignment( BWAJavaAligner aligner ) { this.aligner = aligner; this.creationNumber = numCreated++; } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java similarity index 88% rename from java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAligner.java rename to java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java index c5c43ceca..8ce9eff64 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java @@ -12,6 +12,7 @@ import java.io.File; import java.util.*; import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileHeader; /** * Create imperfect alignments from the read to the genome represented by the given BWT / suffix array. @@ -19,7 +20,7 @@ import net.sf.samtools.SAMRecord; * @author mhanna * @version 0.1 */ -public class BWAAligner implements Aligner { +public class BWAJavaAligner implements Aligner { /** * BWT in the forward direction. */ @@ -75,13 +76,49 @@ public class BWAAligner implements Aligner { */ public final int INDEL_END_SKIP = 5; - public BWAAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) { + public BWAJavaAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) { forwardBWT = new BWTReader(forwardBWTFile).read(); reverseBWT = new BWTReader(reverseBWTFile).read(); forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile,forwardBWT).read(); reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile,reverseBWT).read(); } + /** + * Close this instance of the BWA pointer and delete its resources. + */ + public void close() { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Allow the aligner to choose one alignment randomly from the pile of best alignments. + * @param bases Bases to align. + * @return An align + */ + public Alignment getBestAlignment(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Align the read to the reference. + * @param read Read to align. + * @param header Optional header to drop in place. + * @return A list of the alignments. + */ + public SAMRecord align(final SAMRecord read, final SAMFileHeader header) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + public Iterator getAllAlignments(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @param newHeader Optional new header to use when aligning the read. If present, it must be null. + * @return Iterator to alignments. + */ + public Iterator alignAll(final SAMRecord read, final SAMFileHeader newHeader) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); } + + public List align( SAMRecord read ) { List successfulMatches = new ArrayList();