Refactoring. Push the revisions to the common aligner interface down into

the aligner base classes.  Hack the managed implementation to support the
new interface.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2103 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-11-20 17:08:09 +00:00
parent d84444200b
commit 4fbb6d05d0
14 changed files with 393 additions and 213 deletions

View File

@ -17,21 +17,21 @@ static void set_int_configuration_param(JNIEnv* env, jobject configuration, cons
static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter);
static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message);
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject configuration)
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration)
{
jstring java_ann = get_configuration_string(env,configuration,"annFileName");
jstring java_ann = get_configuration_string(env,bwtFiles,"annFileName");
if(env->ExceptionCheck()) return 0L;
jstring java_amb = get_configuration_string(env,configuration,"ambFileName");
jstring java_amb = get_configuration_string(env,bwtFiles,"ambFileName");
if(env->ExceptionCheck()) return 0L;
jstring java_pac = get_configuration_string(env,configuration,"pacFileName");
jstring java_pac = get_configuration_string(env,bwtFiles,"pacFileName");
if(env->ExceptionCheck()) return 0L;
jstring java_forward_bwt = get_configuration_string(env,configuration,"forwardBWTFileName");
jstring java_forward_bwt = get_configuration_string(env,bwtFiles,"forwardBWTFileName");
if(env->ExceptionCheck()) return 0L;
jstring java_forward_sa = get_configuration_string(env,configuration,"forwardSAFileName");
jstring java_forward_sa = get_configuration_string(env,bwtFiles,"forwardSAFileName");
if(env->ExceptionCheck()) return 0L;
jstring java_reverse_bwt = get_configuration_string(env,configuration,"reverseBWTFileName");
jstring java_reverse_bwt = get_configuration_string(env,bwtFiles,"reverseBWTFileName");
if(env->ExceptionCheck()) return 0L;
jstring java_reverse_sa = get_configuration_string(env,configuration,"reverseSAFileName");
jstring java_reverse_sa = get_configuration_string(env,bwtFiles,"reverseSAFileName");
if(env->ExceptionCheck()) return 0L;
const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE);
@ -57,20 +57,8 @@ JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligne
reverse_bwt_filename,
reverse_sa_filename);
set_float_configuration_param(env, configuration, "maximumEditDistance", bwa, &BWA::set_max_edit_distance);
if(env->ExceptionCheck()) return 0L;
set_int_configuration_param(env, configuration, "maximumGapOpens", bwa, &BWA::set_max_gap_opens);
if(env->ExceptionCheck()) return 0L;
set_int_configuration_param(env, configuration, "maximumGapExtensions", bwa, &BWA::set_max_gap_extensions);
if(env->ExceptionCheck()) return 0L;
set_int_configuration_param(env, configuration, "disallowIndelWithinRange", bwa, &BWA::set_disallow_indel_within_range);
if(env->ExceptionCheck()) return 0L;
set_int_configuration_param(env, configuration, "mismatchPenalty", bwa, &BWA::set_mismatch_penalty);
if(env->ExceptionCheck()) return 0L;
set_int_configuration_param(env, configuration, "gapOpenPenalty", bwa, &BWA::set_gap_open_penalty);
if(env->ExceptionCheck()) return 0L;
set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty);
if(env->ExceptionCheck()) return 0L;
Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(env,instance,(jlong)bwa,configuration);
if(env->ExceptionCheck()) return 0L;
env->ReleaseStringUTFChars(java_ann,ann_filename);
if(env->ExceptionCheck()) return 0L;
@ -96,6 +84,24 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
delete bwa;
}
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration(JNIEnv *env, jobject instance, jlong java_bwa, jobject configuration) {
BWA* bwa = (BWA*)java_bwa;
set_float_configuration_param(env, configuration, "maximumEditDistance", bwa, &BWA::set_max_edit_distance);
if(env->ExceptionCheck()) return;
set_int_configuration_param(env, configuration, "maximumGapOpens", bwa, &BWA::set_max_gap_opens);
if(env->ExceptionCheck()) return;
set_int_configuration_param(env, configuration, "maximumGapExtensions", bwa, &BWA::set_max_gap_extensions);
if(env->ExceptionCheck()) return;
set_int_configuration_param(env, configuration, "disallowIndelWithinRange", bwa, &BWA::set_disallow_indel_within_range);
if(env->ExceptionCheck()) return;
set_int_configuration_param(env, configuration, "mismatchPenalty", bwa, &BWA::set_mismatch_penalty);
if(env->ExceptionCheck()) return;
set_int_configuration_param(env, configuration, "gapOpenPenalty", bwa, &BWA::set_gap_open_penalty);
if(env->ExceptionCheck()) return;
set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty);
if(env->ExceptionCheck()) return;
}
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases)
{
BWA* bwa = (BWA*)java_bwa;

View File

@ -10,10 +10,18 @@ extern "C" {
/*
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
* Method: create
* Signature: (Lorg/broadinstitute/sting/alignment/bwa/c/BWACConfiguration;)J
* Signature: (Lorg/broadinstitute/sting/alignment/bwa/BWTFiles;Lorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)J
*/
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create
(JNIEnv *, jobject, jobject);
(JNIEnv *, jobject, jobject, jobject);
/*
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
* Method: updateConfiguration
* Signature: (JLorg/broadinstitute/sting/alignment/bwa/BWAConfiguration;)V
*/
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_updateConfiguration
(JNIEnv *, jobject, jlong, jobject);
/*
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner

View File

@ -1,8 +1,10 @@
package org.broadinstitute.sting.alignment;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import java.util.List;
import java.util.Iterator;
/**
* Create perfect alignments from the read to the genome represented by the given BWT / suffix array.
@ -11,11 +13,40 @@ import java.util.List;
* @version 0.1
*/
public interface Aligner {
/**
* Close this instance of the BWA pointer and delete its resources.
*/
public void close();
/**
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
* @param bases Bases to align.
* @return An align
*/
public Alignment getBestAlignment(final byte[] bases);
/**
* Align the read to the reference.
* @param read Read to align.
* @param header Optional header to drop in place.
* @return A list of the alignments.
*/
public List<Alignment> align(SAMRecord read);
public SAMRecord align(final SAMRecord read, final SAMFileHeader header);
/**
* Get a iterator of alignments, batched by mapping quality.
* @param bases List of bases.
* @return Iterator to alignments.
*/
public Iterator<Alignment[]> getAllAlignments(final byte[] bases);
/**
* Get a iterator of aligned reads, batched by mapping quality.
* @param read Read to align.
* @param newHeader Optional new header to use when aligning the read. If present, it must be null.
* @return Iterator to alignments.
*/
public Iterator<SAMRecord[]> alignAll(final SAMRecord read, final SAMFileHeader newHeader);
}

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.alignment;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.CigarElement;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
/**
* Represents an alignment of a read to a site in the reference genome.
@ -138,6 +138,13 @@ public class Alignment {
* @param mappingQuality How good does BWA think this mapping is?
* @param cigarOperators The ordered operators in the cigar string.
* @param cigarLengths The lengths to which each operator applies.
* @param editDistance The edit distance (cumulative) of the read.
* @param mismatchingPositions String representation of which bases in the read mismatch.
* @param numMismatches Number of total mismatches in the read.
* @param numGapOpens Number of gap opens in the read.
* @param numGapExtensions Number of gap extensions in the read.
* @param bestCount Number of best alignments in the read.
* @param secondBestCount Number of second best alignments in the read.
*/
public Alignment(int contigIndex,
int alignmentStart,
@ -166,4 +173,42 @@ public class Alignment {
this.bestCount = bestCount;
this.secondBestCount = secondBestCount;
}
/**
* Creates a read directly from an alignment.
* @param alignment The alignment to convert to a read.
* @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags.
* @param newSAMHeader The new SAM header to use in creating this read. Can be null, but if so, the sequence
* dictionary in the
* @return A mapped alignment.
*/
public static SAMRecord convertToRead(Alignment alignment, SAMRecord unmappedRead, SAMFileHeader newSAMHeader) {
SAMRecord read;
try {
read = (SAMRecord)unmappedRead.clone();
}
catch(CloneNotSupportedException ex) {
throw new StingException("Unable to create aligned read from template.");
}
if(newSAMHeader != null)
read.setHeader(newSAMHeader);
if(alignment != null) {
read.setReadUmappedFlag(false);
read.setReferenceIndex(alignment.getContigIndex());
read.setAlignmentStart((int)alignment.getAlignmentStart());
read.setReadNegativeStrandFlag(alignment.isNegativeStrand());
read.setMappingQuality(alignment.getMappingQuality());
read.setCigar(alignment.getCigar());
if(alignment.isNegativeStrand()) {
read.setReadBases(BaseUtils.reverse(read.getReadBases()));
read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities()));
}
read.setAttribute("NM",alignment.getEditDistance());
read.setAttribute("MD",alignment.getMismatchingPositions());
}
return read;
}
}

View File

@ -5,15 +5,11 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
import org.broadinstitute.sting.alignment.bwa.c.BWAPath;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import net.sf.samtools.SAMRecord;
import java.util.Iterator;
import java.util.SortedMap;
import java.util.TreeMap;
import java.io.PrintWriter;
import java.io.FileNotFoundException;
/**
* Validates alignments against existing reads.
@ -43,8 +39,9 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
*/
@Override
public void initialize() {
BWACConfiguration configuration = new BWACConfiguration(prefix);
aligner = new BWACAligner(configuration);
BWTFiles bwtFiles = new BWTFiles(prefix);
BWAConfiguration configuration = new BWAConfiguration();
aligner = new BWACAligner(bwtFiles,configuration);
}
/** Must return true for reads that need to be processed. Reads, for which this method return false will

View File

@ -5,7 +5,8 @@ import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import net.sf.samtools.*;
import net.sf.picard.reference.ReferenceSequenceFileFactory;
@ -36,6 +37,11 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
*/
private SAMFileWriter outputBam = null;
/**
* New header to use, if desired.
*/
private SAMFileHeader header = null;
/** Must return true for reads that need to be processed. Reads, for which this method return false will
* be skipped by the engine and never passed to the walker.
*/
@ -49,17 +55,21 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
*/
@Override
public void initialize() {
BWACConfiguration configuration = new BWACConfiguration(prefix);
aligner = new BWACAligner(configuration);
BWTFiles bwtFiles = new BWTFiles(prefix);
BWAConfiguration configuration = new BWAConfiguration();
aligner = new BWACAligner(bwtFiles,configuration);
// HACK: If the sequence dictionary in the existing header is null, stuff the contents of the current reference
// into it, so that the sequence has something to which to back-align.
SAMFileHeader header = getToolkit().getSAMFileHeader();
if(header.getSequenceDictionary().isEmpty()) {
SAMFileHeader originalHeader = getToolkit().getSAMFileHeader();
if(originalHeader.getSequenceDictionary().isEmpty()) {
header = originalHeader.clone();
SAMSequenceDictionary referenceDictionary =
ReferenceSequenceFileFactory.getReferenceSequenceFile(getToolkit().getArguments().referenceFile).getSequenceDictionary();
header.setSequenceDictionary(referenceDictionary);
}
else
header = originalHeader;
if ( outputBamFile != null ) {
// Stuff the header from the fasta into that of the sequence dictionary.
@ -75,7 +85,7 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
*/
@Override
public Integer map(char[] ref, SAMRecord read) {
SAMRecord alignedRead = aligner.align(read);
SAMRecord alignedRead = aligner.align(read,header);
if (outputBam != null) {
outputBam.addAlignment(alignedRead);
} else {

View File

@ -0,0 +1,31 @@
package org.broadinstitute.sting.alignment.bwa;
import org.broadinstitute.sting.alignment.Aligner;
import org.broadinstitute.sting.alignment.Alignment;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import java.util.Iterator;
/**
* Align reads using BWA.
*
* @author mhanna
* @version 0.1
*/
public abstract class BWAAligner implements Aligner {
/**
* Create a new BWAAligner. Purpose of this call is to ensure that all BWA constructors accept the correct
* parameters.
* @param bwtFiles The many files representing BWTs persisted to disk.
* @param configuration Configuration parameters for the alignment.
*/
public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { }
/**
* Update the configuration passed to the BWA aligner.
* @param configuration New configuration to set.
*/
public abstract void updateConfiguration(BWAConfiguration configuration);
}

View File

@ -0,0 +1,46 @@
package org.broadinstitute.sting.alignment.bwa;
import org.broadinstitute.sting.utils.StingException;
/**
* Configuration for the BWA/C aligner.
*
* @author mhanna
* @version 0.1
*/
public class BWAConfiguration {
/**
* The maximum edit distance used by BWA.
*/
public Float maximumEditDistance = null;
/**
* How many gap opens are acceptable within this alignment?
*/
public Integer maximumGapOpens = null;
/**
* How many gap extensions are acceptable within this alignment?
*/
public Integer maximumGapExtensions = null;
/**
* Do we disallow indels within a certain range from the start / end?
*/
public Integer disallowIndelWithinRange = null;
/**
* What is the scoring penalty for a mismatch?
*/
public Integer mismatchPenalty = null;
/**
* What is the scoring penalty for a gap open?
*/
public Integer gapOpenPenalty = null;
/**
* What is the scoring penalty for a gap extension?
*/
public Integer gapExtensionPenalty = null;
}

View File

@ -0,0 +1,62 @@
package org.broadinstitute.sting.alignment.bwa;
import org.broadinstitute.sting.utils.StingException;
/**
* Support files for BWT.
*
* @author mhanna
* @version 0.1
*/
public class BWTFiles {
/**
* ANN (?) file name.
*/
public final String annFileName;
/**
* AMB (?) file name.
*/
public final String ambFileName;
/**
* Packed reference sequence file.
*/
public final String pacFileName;
/**
* Forward BWT file.
*/
public final String forwardBWTFileName;
/**
* Forward suffix array file.
*/
public final String forwardSAFileName;
/**
* Reverse BWT file.
*/
public final String reverseBWTFileName;
/**
* Reverse suffix array file.
*/
public final String reverseSAFileName;
/**
* Create a new BWA configuration file using the given prefix.
* @param prefix Prefix to use when creating the configuration. Must not be null.
*/
public BWTFiles(String prefix) {
if(prefix == null)
throw new StingException("Prefix must not be null.");
annFileName = prefix + ".ann";
ambFileName = prefix + ".amb";
pacFileName = prefix + ".pac";
forwardBWTFileName = prefix + ".bwt";
forwardSAFileName = prefix + ".sa";
reverseBWTFileName = prefix + ".rbwt";
reverseSAFileName = prefix + ".rsa";
}
}

View File

@ -1,9 +1,13 @@
package org.broadinstitute.sting.alignment.bwa.c;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
import java.util.*;
import java.io.File;
@ -14,7 +18,7 @@ import java.io.File;
* @author mhanna
* @version 0.1
*/
public class BWACAligner {
public class BWACAligner extends BWAAligner {
static {
System.loadLibrary("bwa");
}
@ -24,25 +28,37 @@ public class BWACAligner {
*/
private long thunkPointer = 0;
public BWACAligner(BWACConfiguration configuration) {
public BWACAligner(BWTFiles bwtFiles, BWAConfiguration configuration) {
super(bwtFiles,configuration);
if(thunkPointer != 0)
throw new StingException("BWA/C attempting to reinitialize.");
if(!new File(configuration.annFileName).exists()) throw new StingException("ANN file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(configuration.ambFileName).exists()) throw new StingException("AMB file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(configuration.pacFileName).exists()) throw new StingException("PAC file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(configuration.forwardBWTFileName).exists()) throw new StingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(configuration.forwardSAFileName).exists()) throw new StingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(configuration.reverseBWTFileName).exists()) throw new StingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(configuration.reverseSAFileName).exists()) throw new StingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(bwtFiles.annFileName).exists()) throw new StingException("ANN file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(bwtFiles.ambFileName).exists()) throw new StingException("AMB file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(bwtFiles.pacFileName).exists()) throw new StingException("PAC file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(bwtFiles.forwardBWTFileName).exists()) throw new StingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(bwtFiles.forwardSAFileName).exists()) throw new StingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(bwtFiles.reverseBWTFileName).exists()) throw new StingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it.");
if(!new File(bwtFiles.reverseSAFileName).exists()) throw new StingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it.");
thunkPointer = create(configuration);
thunkPointer = create(bwtFiles,configuration);
}
/**
* Update the configuration passed to the BWA aligner.
* @param configuration New configuration to set.
*/
@Override
public void updateConfiguration(BWAConfiguration configuration) {
if(thunkPointer != 0)
throw new StingException("BWA/C: attempting to update configuration of uninitialized aligner.");
updateConfiguration(thunkPointer,configuration);
}
/**
* Close this instance of the BWA pointer and delete its resources.
*/
@Override
public void close() {
if(thunkPointer == 0)
throw new StingException("BWA/C close attempted, but BWA/C is not properly initialized.");
@ -54,6 +70,7 @@ public class BWACAligner {
* @param bases Bases to align.
* @return An align
*/
@Override
public Alignment getBestAlignment(final byte[] bases) {
if(thunkPointer == 0)
throw new StingException("BWA/C getBestAlignment attempted, but BWA/C is not properly initialized.");
@ -63,10 +80,12 @@ public class BWACAligner {
/**
* Get the best aligned read, chosen randomly from the pile of best alignments.
* @param read Read to align.
* @param newHeader New header to apply to this SAM file. Can be null, but if so, read header must be valid.
* @return Read with injected alignment data.
*/
public SAMRecord align(final SAMRecord read) {
return convertAlignmentToRead(read,getBestAlignment(read.getReadBases()));
@Override
public SAMRecord align(final SAMRecord read, final SAMFileHeader newHeader) {
return Alignment.convertToRead(getBestAlignment(read.getReadBases()),read,newHeader);
}
/**
@ -74,6 +93,7 @@ public class BWACAligner {
* @param bases List of bases.
* @return Iterator to alignments.
*/
@Override
public Iterator<Alignment[]> getAllAlignments(final byte[] bases) {
final BWAPath[] paths = getPaths(bases);
return new Iterator<Alignment[]>() {
@ -111,9 +131,11 @@ public class BWACAligner {
/**
* Get a iterator of aligned reads, batched by mapping quality.
* @param read Read to align.
* @param newHeader Optional new header to use when aligning the read. If present, it must be null.
* @return Iterator to alignments.
*/
public Iterator<SAMRecord[]> alignAll(final SAMRecord read) {
@Override
public Iterator<SAMRecord[]> alignAll(final SAMRecord read, final SAMFileHeader newHeader) {
final Iterator<Alignment[]> alignments = getAllAlignments(read.getReadBases());
return new Iterator<SAMRecord[]>() {
/**
@ -130,7 +152,7 @@ public class BWACAligner {
Alignment[] alignmentsOfQuality = alignments.next();
SAMRecord[] reads = new SAMRecord[alignmentsOfQuality.length];
for(int i = 0; i < alignmentsOfQuality.length; i++) {
reads[i] = convertAlignmentToRead(read,alignmentsOfQuality[i]);
reads[i] = Alignment.convertToRead(alignmentsOfQuality[i],read,newHeader);
}
return reads;
}
@ -142,37 +164,6 @@ public class BWACAligner {
};
}
/**
* Creates a read directly from an alignment.
* @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags.
* @param alignment The target alignment for this read. If alignment is null, assume the read is unmappe.d
* @return A mapped alignment.
*/
public SAMRecord convertAlignmentToRead(SAMRecord unmappedRead, Alignment alignment) {
SAMRecord read = null;
try {
read = (SAMRecord)unmappedRead.clone();
if(alignment != null) {
read.setReadUmappedFlag(false);
read.setReferenceIndex(alignment.getContigIndex());
read.setAlignmentStart((int)alignment.getAlignmentStart());
read.setReadNegativeStrandFlag(alignment.isNegativeStrand());
read.setMappingQuality(alignment.getMappingQuality());
read.setCigar(alignment.getCigar());
if(alignment.isNegativeStrand()) {
read.setReadBases(BaseUtils.reverse(read.getReadBases()));
read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities()));
}
read.setAttribute("NM",alignment.getEditDistance());
read.setAttribute("MD",alignment.getMismatchingPositions());
}
}
catch(CloneNotSupportedException ex) {
throw new StingException("Unable to create aligned read from template.");
}
return read;
}
/**
* Get the paths associated with the given base string.
* @param bases List of bases.
@ -181,16 +172,23 @@ public class BWACAligner {
public BWAPath[] getPaths(byte[] bases) {
if(thunkPointer == 0)
throw new StingException("BWA/C getPaths attempted, but BWA/C is not properly initialized.");
BWAPath[] paths = getPaths(thunkPointer,bases);
return paths;
return getPaths(thunkPointer,bases);
}
/**
* Create a pointer to the BWA/C thunk.
* @param files BWT source files.
* @param configuration Configuration of the aligner.
* @return Pointer to the BWA/C thunk.
*/
protected native long create(BWACConfiguration configuration);
protected native long create(BWTFiles files, BWAConfiguration configuration);
/**
* Update the configuration passed to the BWA aligner. For internal use only.
* @param thunkPointer pointer to BWA object.
* @param configuration New configuration to set.
*/
protected native void updateConfiguration(long thunkPointer, BWAConfiguration configuration);
/**
* Destroy the BWA/C thunk.

View File

@ -1,98 +0,0 @@
package org.broadinstitute.sting.alignment.bwa.c;
import org.broadinstitute.sting.utils.StingException;
/**
* Configuration for the BWA/C aligner.
*
* @author mhanna
* @version 0.1
*/
public class BWACConfiguration {
/**
* ANN (?) file name.
*/
public String annFileName = null;
/**
* AMB (?) file name.
*/
public String ambFileName = null;
/**
* Packed reference sequence file.
*/
public String pacFileName = null;
/**
* Forward BWT file.
*/
public String forwardBWTFileName = null;
/**
* Forward suffix array file.
*/
public String forwardSAFileName = null;
/**
* Reverse BWT file.
*/
public String reverseBWTFileName = null;
/**
* Reverse suffix array file.
*/
public String reverseSAFileName = null;
/**
* The maximum edit distance used by BWA.
*/
public Float maximumEditDistance = null;
/**
* How many gap opens are acceptable within this alignment?
*/
public Integer maximumGapOpens = null;
/**
* How many gap extensions are acceptable within this alignment?
*/
public Integer maximumGapExtensions = null;
/**
* Do we disallow indels within a certain range from the start / end?
*/
public Integer disallowIndelWithinRange = null;
/**
* What is the scoring penalty for a mismatch?
*/
public Integer mismatchPenalty = null;
/**
* What is the scoring penalty for a gap open?
*/
public Integer gapOpenPenalty = null;
/**
* What is the scoring penalty for a gap extension?
*/
public Integer gapExtensionPenalty = null;
/**
* Create a new BWA configuration file using the given prefix.
* @param prefix Prefix to use when creating the configuration. Must not be null.
*/
public BWACConfiguration(String prefix) {
if(prefix == null)
throw new StingException("Prefix must not be null.");
annFileName = prefix + ".ann";
ambFileName = prefix + ".amb";
pacFileName = prefix + ".pac";
forwardBWTFileName = prefix + ".bwt";
forwardSAFileName = prefix + ".sa";
reverseBWTFileName = prefix + ".rbwt";
reverseSAFileName = prefix + ".rsa";
}
}

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.alignment.bwa.java;
import org.broadinstitute.sting.alignment.Aligner;
import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.sting.alignment.bwa.java.BWAAligner;
import org.broadinstitute.sting.alignment.bwa.java.BWAJavaAligner;
import org.broadinstitute.sting.alignment.bwa.java.BWAAlignment;
import org.broadinstitute.sting.alignment.bwa.java.AlignmentState;
import org.broadinstitute.sting.utils.StingException;
@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.List;
import java.util.Iterator;
import net.sf.samtools.*;
@ -39,7 +39,7 @@ public class AlignerTestHarness {
}
private static void align(File referenceFile, File bwtFile, File rbwtFile, File suffixArrayFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException {
Aligner aligner = new BWAAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile);
Aligner aligner = new BWAJavaAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile);
int count = 0;
SAMFileReader reader = new SAMFileReader(bamFile);
@ -79,21 +79,24 @@ public class AlignerTestHarness {
// Clear everything except flags pertaining to pairing and set 'unmapped' status to true.
alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C);
List<Alignment> alignments = aligner.align(alignmentCleaned);
if(alignments.size() == 0 ) {
Iterator<Alignment[]> alignments = aligner.getAllAlignments(alignmentCleaned.getReadBases());
if(!alignments.hasNext() ) {
//throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
System.out.printf("Unable to align read %s to reference; count = %d%n",read.getReadName(),count);
failures++;
}
Alignment foundAlignment = null;
for( Alignment alignment: alignments ) {
if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() )
continue;
if( read.getAlignmentStart() != alignment.getAlignmentStart() )
continue;
while(alignments.hasNext()) {
Alignment[] alignmentsOfQuality = alignments.next();
for(Alignment alignment: alignmentsOfQuality) {
if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() )
continue;
if( read.getAlignmentStart() != alignment.getAlignmentStart() )
continue;
foundAlignment = alignment;
foundAlignment = alignment;
}
}
if( foundAlignment != null ) {
@ -113,18 +116,22 @@ public class AlignerTestHarness {
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()+numDeletions-1).getBases());
System.out.printf("expected ref = %s%n", formatBasesBasedOnCigar(expectedRef,read.getCigar(),CigarOperator.INSERTION));
for( Alignment alignment: alignments ) {
System.out.println();
Iterator<Alignment[]> alignmentsToOutput = aligner.getAllAlignments(alignmentCleaned.getReadBases());
while(alignmentsToOutput.hasNext()) {
Alignment[] alignmentsOfQuality = alignments.next();
for(Alignment alignment: alignmentsOfQuality) {
System.out.println();
Cigar cigar = ((BWAAlignment)alignment).getCigar();
Cigar cigar = ((BWAAlignment)alignment).getCigar();
System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION));
System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION));
int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION);
String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases());
System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION),
alignment.getAlignmentStart(),
alignment.isNegativeStrand());
int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION);
String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases());
System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION),
alignment.getAlignmentStart(),
alignment.isNegativeStrand());
}
}
//throw new StingException(String.format("Read %s was placed at incorrect location; count = %d%n",read.getReadName(),count));

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.alignment.bwa.java;
import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.sting.alignment.bwa.java.BWAAligner;
import org.broadinstitute.sting.alignment.bwa.java.BWAJavaAligner;
import org.broadinstitute.sting.alignment.bwa.java.AlignmentMatchSequence;
import org.broadinstitute.sting.alignment.bwa.java.AlignmentState;
import org.broadinstitute.sting.utils.StingException;
@ -28,7 +28,7 @@ public class BWAAlignment extends Alignment implements Cloneable {
/**
* The aligner performing the alignments.
*/
protected BWAAligner aligner;
protected BWAJavaAligner aligner;
/**
* The sequence of matches/mismatches/insertions/deletions.
@ -136,7 +136,7 @@ public class BWAAlignment extends Alignment implements Cloneable {
* Create a new alignment with the given parent aligner.
* @param aligner Aligner being used.
*/
public BWAAlignment( BWAAligner aligner ) {
public BWAAlignment( BWAJavaAligner aligner ) {
this.aligner = aligner;
this.creationNumber = numCreated++;
}

View File

@ -12,6 +12,7 @@ import java.io.File;
import java.util.*;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
/**
* Create imperfect alignments from the read to the genome represented by the given BWT / suffix array.
@ -19,7 +20,7 @@ import net.sf.samtools.SAMRecord;
* @author mhanna
* @version 0.1
*/
public class BWAAligner implements Aligner {
public class BWAJavaAligner implements Aligner {
/**
* BWT in the forward direction.
*/
@ -75,13 +76,49 @@ public class BWAAligner implements Aligner {
*/
public final int INDEL_END_SKIP = 5;
public BWAAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) {
public BWAJavaAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) {
forwardBWT = new BWTReader(forwardBWTFile).read();
reverseBWT = new BWTReader(reverseBWTFile).read();
forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile,forwardBWT).read();
reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile,reverseBWT).read();
}
/**
* Close this instance of the BWA pointer and delete its resources.
*/
public void close() { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
/**
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
* @param bases Bases to align.
* @return An align
*/
public Alignment getBestAlignment(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
/**
* Align the read to the reference.
* @param read Read to align.
* @param header Optional header to drop in place.
* @return A list of the alignments.
*/
public SAMRecord align(final SAMRecord read, final SAMFileHeader header) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
/**
* Get a iterator of alignments, batched by mapping quality.
* @param bases List of bases.
* @return Iterator to alignments.
*/
public Iterator<Alignment[]> getAllAlignments(final byte[] bases) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
/**
* Get a iterator of aligned reads, batched by mapping quality.
* @param read Read to align.
* @param newHeader Optional new header to use when aligning the read. If present, it must be null.
* @return Iterator to alignments.
*/
public Iterator<SAMRecord[]> alignAll(final SAMRecord read, final SAMFileHeader newHeader) { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
public List<Alignment> align( SAMRecord read ) {
List<Alignment> successfulMatches = new ArrayList<Alignment>();