Support for ad-hoc reference sequences. Also reenabled BWA/Java integration test, which was commented out
and the data backing it up deleted without my knowledge. Unfortunately, since the data was deleted, I had to regenerate the data and a new md5. Hopefully the aligner output is still correct. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2493 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
89f3ee614a
commit
b6ecc9e151
|
|
@ -12,27 +12,27 @@ typedef void (BWA::*int_setter)(int value);
|
|||
typedef void (BWA::*float_setter)(float value);
|
||||
|
||||
static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment);
|
||||
static jstring get_configuration_string(JNIEnv* env, jobject configuration, const char* field_name);
|
||||
static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name);
|
||||
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter);
|
||||
static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter);
|
||||
static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message);
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration)
|
||||
{
|
||||
jstring java_ann = get_configuration_string(env,bwtFiles,"annFileName");
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
jstring java_amb = get_configuration_string(env,bwtFiles,"ambFileName");
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
jstring java_pac = get_configuration_string(env,bwtFiles,"pacFileName");
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
jstring java_forward_bwt = get_configuration_string(env,bwtFiles,"forwardBWTFileName");
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
jstring java_forward_sa = get_configuration_string(env,bwtFiles,"forwardSAFileName");
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
jstring java_reverse_bwt = get_configuration_string(env,bwtFiles,"reverseBWTFileName");
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
jstring java_reverse_sa = get_configuration_string(env,bwtFiles,"reverseSAFileName");
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
jstring java_ann = get_configuration_file(env,bwtFiles,"annFile");
|
||||
if(java_ann == NULL) return 0L;
|
||||
jstring java_amb = get_configuration_file(env,bwtFiles,"ambFile");
|
||||
if(java_amb == NULL) return 0L;
|
||||
jstring java_pac = get_configuration_file(env,bwtFiles,"pacFile");
|
||||
if(java_pac == NULL) return 0L;
|
||||
jstring java_forward_bwt = get_configuration_file(env,bwtFiles,"forwardBWTFile");
|
||||
if(java_forward_bwt == NULL) return 0L;
|
||||
jstring java_forward_sa = get_configuration_file(env,bwtFiles,"forwardSAFile");
|
||||
if(java_forward_sa == NULL) return 0L;
|
||||
jstring java_reverse_bwt = get_configuration_file(env,bwtFiles,"reverseBWTFile");
|
||||
if(java_reverse_bwt == NULL) return 0L;
|
||||
jstring java_reverse_sa = get_configuration_file(env,bwtFiles,"reverseSAFile");
|
||||
if(java_reverse_sa == NULL) return 0L;
|
||||
|
||||
const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE);
|
||||
if(env->ExceptionCheck()) return 0L;
|
||||
|
|
@ -332,16 +332,29 @@ static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, c
|
|||
return java_alignment;
|
||||
}
|
||||
|
||||
static jstring get_configuration_string(JNIEnv* env, jobject configuration, const char* field_name) {
|
||||
static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name) {
|
||||
jclass configuration_class = env->GetObjectClass(configuration);
|
||||
if(configuration_class == NULL) return NULL;
|
||||
|
||||
jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/String;");
|
||||
jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/io/File;");
|
||||
if(configuration_field == NULL) return NULL;
|
||||
|
||||
jstring result = (jstring)env->GetObjectField(configuration,configuration_field);
|
||||
env->DeleteLocalRef(configuration_class);
|
||||
return result;
|
||||
jobject configuration_file = (jobject)env->GetObjectField(configuration,configuration_field);
|
||||
|
||||
jclass file_class = env->FindClass("java/io/File");
|
||||
if(file_class == NULL) return NULL;
|
||||
|
||||
jmethodID path_extractor = env->GetMethodID(file_class,"getAbsolutePath", "()Ljava/lang/String;");
|
||||
if(path_extractor == NULL) return NULL;
|
||||
|
||||
jstring path = (jstring)env->CallObjectMethod(configuration_file,path_extractor);
|
||||
if(path == NULL) return NULL;
|
||||
|
||||
env->DeleteLocalRef(configuration_class);
|
||||
env->DeleteLocalRef(file_class);
|
||||
env->DeleteLocalRef(configuration_file);
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) {
|
||||
|
|
|
|||
|
|
@ -1,11 +1,6 @@
|
|||
package org.broadinstitute.sting.alignment.bwa;
|
||||
|
||||
import org.broadinstitute.sting.alignment.Aligner;
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Align reads using BWA.
|
||||
|
|
@ -14,6 +9,15 @@ import java.util.Iterator;
|
|||
* @version 0.1
|
||||
*/
|
||||
public abstract class BWAAligner implements Aligner {
|
||||
/**
|
||||
* The supporting files used by BWA.
|
||||
*/
|
||||
protected BWTFiles bwtFiles;
|
||||
|
||||
/**
|
||||
* The current configuration for the BWA aligner.
|
||||
*/
|
||||
protected BWAConfiguration configuration;
|
||||
|
||||
/**
|
||||
* Create a new BWAAligner. Purpose of this call is to ensure that all BWA constructors accept the correct
|
||||
|
|
@ -21,7 +25,17 @@ public abstract class BWAAligner implements Aligner {
|
|||
* @param bwtFiles The many files representing BWTs persisted to disk.
|
||||
* @param configuration Configuration parameters for the alignment.
|
||||
*/
|
||||
public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { }
|
||||
public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) {
|
||||
this.bwtFiles = bwtFiles;
|
||||
this.configuration = configuration;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the existing aligner, freeing resources it consumed.
|
||||
*/
|
||||
public void close() {
|
||||
bwtFiles.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the configuration passed to the BWA aligner.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,19 @@
|
|||
package org.broadinstitute.sting.alignment.bwa;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.BWT;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.BWTWriter;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.SuffixArray;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.SuffixArrayWriter;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.ANNWriter;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.AMBWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
|
||||
/**
|
||||
* Support files for BWT.
|
||||
|
|
@ -12,37 +25,42 @@ public class BWTFiles {
|
|||
/**
|
||||
* ANN (?) file name.
|
||||
*/
|
||||
public final String annFileName;
|
||||
public final File annFile;
|
||||
|
||||
/**
|
||||
* AMB (?) file name.
|
||||
*/
|
||||
public final String ambFileName;
|
||||
public final File ambFile;
|
||||
|
||||
/**
|
||||
* Packed reference sequence file.
|
||||
*/
|
||||
public final String pacFileName;
|
||||
public final File pacFile;
|
||||
|
||||
/**
|
||||
* Forward BWT file.
|
||||
*/
|
||||
public final String forwardBWTFileName;
|
||||
public final File forwardBWTFile;
|
||||
|
||||
/**
|
||||
* Forward suffix array file.
|
||||
*/
|
||||
public final String forwardSAFileName;
|
||||
public final File forwardSAFile;
|
||||
|
||||
/**
|
||||
* Reverse BWT file.
|
||||
*/
|
||||
public final String reverseBWTFileName;
|
||||
public final File reverseBWTFile;
|
||||
|
||||
/**
|
||||
* Reverse suffix array file.
|
||||
*/
|
||||
public final String reverseSAFileName;
|
||||
public final File reverseSAFile;
|
||||
|
||||
/**
|
||||
* Where these files autogenerated on the fly?
|
||||
*/
|
||||
private final boolean autogenerated;
|
||||
|
||||
/**
|
||||
* Create a new BWA configuration file using the given prefix.
|
||||
|
|
@ -51,12 +69,143 @@ public class BWTFiles {
|
|||
public BWTFiles(String prefix) {
|
||||
if(prefix == null)
|
||||
throw new StingException("Prefix must not be null.");
|
||||
annFileName = prefix + ".ann";
|
||||
ambFileName = prefix + ".amb";
|
||||
pacFileName = prefix + ".pac";
|
||||
forwardBWTFileName = prefix + ".bwt";
|
||||
forwardSAFileName = prefix + ".sa";
|
||||
reverseBWTFileName = prefix + ".rbwt";
|
||||
reverseSAFileName = prefix + ".rsa";
|
||||
annFile = new File(prefix + ".ann");
|
||||
ambFile = new File(prefix + ".amb");
|
||||
pacFile = new File(prefix + ".pac");
|
||||
forwardBWTFile = new File(prefix + ".bwt");
|
||||
forwardSAFile = new File(prefix + ".sa");
|
||||
reverseBWTFile = new File(prefix + ".rbwt");
|
||||
reverseSAFile = new File(prefix + ".rsa");
|
||||
autogenerated = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hand-create a new BWTFiles object, specifying a unique file object for each type.
|
||||
* @param annFile ANN (alternate dictionary) file.
|
||||
* @param ambFile AMB (holes) files.
|
||||
* @param pacFile Packed representation of the forward reference sequence.
|
||||
* @param forwardBWTFile BWT representation of the forward reference sequence.
|
||||
* @param forwardSAFile SA representation of the forward reference sequence.
|
||||
* @param reverseBWTFile BWT representation of the reversed reference sequence.
|
||||
* @param reverseSAFile SA representation of the reversed reference sequence.
|
||||
*/
|
||||
private BWTFiles(File annFile,
|
||||
File ambFile,
|
||||
File pacFile,
|
||||
File forwardBWTFile,
|
||||
File forwardSAFile,
|
||||
File reverseBWTFile,
|
||||
File reverseSAFile) {
|
||||
this.annFile = annFile;
|
||||
this.ambFile = ambFile;
|
||||
this.pacFile = pacFile;
|
||||
this.forwardBWTFile = forwardBWTFile;
|
||||
this.forwardSAFile = forwardSAFile;
|
||||
this.reverseBWTFile = reverseBWTFile;
|
||||
this.reverseSAFile = reverseSAFile;
|
||||
autogenerated = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close out this files object, in the process deleting any temporary filse
|
||||
* that were created.
|
||||
*/
|
||||
public void close() {
|
||||
if(autogenerated) {
|
||||
boolean success = true;
|
||||
success = annFile.delete();
|
||||
success &= ambFile.delete();
|
||||
success &= pacFile.delete();
|
||||
success &= forwardBWTFile.delete();
|
||||
success &= forwardSAFile.delete();
|
||||
success &= reverseBWTFile.delete();
|
||||
success &= reverseSAFile.delete();
|
||||
|
||||
if(!success)
|
||||
throw new StingException("Unable to clean up autogenerated representation");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new set of BWT files from the given reference sequence.
|
||||
* @param referenceSequence Sequence from which to build metadata.
|
||||
* @return A new object representing encoded representations of each sequence.
|
||||
*/
|
||||
public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) {
|
||||
File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile;
|
||||
try {
|
||||
// Write the ann and amb for this reference sequence.
|
||||
annFile = File.createTempFile("bwt","ann");
|
||||
ambFile = File.createTempFile("bwt","amb");
|
||||
|
||||
SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
|
||||
dictionary.addSequence(new SAMSequenceRecord("autogenerated",referenceSequence.length));
|
||||
|
||||
ANNWriter annWriter = new ANNWriter(annFile);
|
||||
annWriter.write(dictionary);
|
||||
annWriter.close();
|
||||
|
||||
AMBWriter ambWriter = new AMBWriter(ambFile);
|
||||
ambWriter.writeEmpty(dictionary);
|
||||
ambWriter.close();
|
||||
|
||||
// Write the encoded files for the forward version of this reference sequence.
|
||||
pacFile = File.createTempFile("bwt","pac");
|
||||
bwtFile = File.createTempFile("bwt","bwt");
|
||||
saFile = File.createTempFile("bwt","sa");
|
||||
|
||||
writeEncodedReferenceSequence(referenceSequence,pacFile,bwtFile,saFile);
|
||||
|
||||
// Write the encoded files for the reverse version of this reference sequence.
|
||||
byte[] reverseReferenceSequence = new byte[referenceSequence.length];
|
||||
System.arraycopy(referenceSequence,0,reverseReferenceSequence,0,referenceSequence.length);
|
||||
|
||||
rpacFile = File.createTempFile("bwt","rpac");
|
||||
rbwtFile = File.createTempFile("bwt","rbwt");
|
||||
rsaFile = File.createTempFile("bwt","rsa");
|
||||
|
||||
writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new StingException("Unable to write autogenerated reference sequence to temporary files");
|
||||
}
|
||||
|
||||
// Make sure that, at the very least, all temporary files are deleted on exit.
|
||||
annFile.deleteOnExit();
|
||||
ambFile.deleteOnExit();
|
||||
pacFile.deleteOnExit();
|
||||
bwtFile.deleteOnExit();
|
||||
saFile.deleteOnExit();
|
||||
rpacFile.deleteOnExit();
|
||||
rbwtFile.deleteOnExit();
|
||||
rsaFile.deleteOnExit();
|
||||
|
||||
return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rbwtFile,rsaFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the encoded form of the reference sequence. In the case of BWA, the encoded reference
|
||||
* sequence is the reference itself in PAC format, the BWT, and the suffix array.
|
||||
* @param referenceSequence The reference sequence to encode.
|
||||
* @param pacFile Target for the PAC-encoded reference.
|
||||
* @param bwtFile Target for the BWT representation of the reference.
|
||||
* @param suffixArrayFile Target for the suffix array encoding of the reference.
|
||||
* @throws java.io.IOException In case of issues writing to the file.
|
||||
*/
|
||||
private static void writeEncodedReferenceSequence(byte[] referenceSequence,
|
||||
File pacFile,
|
||||
File bwtFile,
|
||||
File suffixArrayFile) throws IOException {
|
||||
PackUtils.writeReferenceSequence(pacFile,referenceSequence);
|
||||
|
||||
BWT bwt = BWT.createFromReferenceSequence(referenceSequence);
|
||||
BWTWriter bwtWriter = new BWTWriter(bwtFile);
|
||||
bwtWriter.write(bwt);
|
||||
bwtWriter.close();
|
||||
|
||||
SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
|
||||
SuffixArrayWriter suffixArrayWriter = new SuffixArrayWriter(suffixArrayFile);
|
||||
suffixArrayWriter.write(suffixArray);
|
||||
suffixArrayWriter.close();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,14 +3,12 @@ package org.broadinstitute.sting.alignment.bwa.c;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* An aligner using the BWA/C implementation.
|
||||
|
|
@ -33,17 +31,26 @@ public class BWACAligner extends BWAAligner {
|
|||
if(thunkPointer != 0)
|
||||
throw new StingException("BWA/C attempting to reinitialize.");
|
||||
|
||||
if(!new File(bwtFiles.annFileName).exists()) throw new StingException("ANN file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(bwtFiles.ambFileName).exists()) throw new StingException("AMB file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(bwtFiles.pacFileName).exists()) throw new StingException("PAC file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(bwtFiles.forwardBWTFileName).exists()) throw new StingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(bwtFiles.forwardSAFileName).exists()) throw new StingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(bwtFiles.reverseBWTFileName).exists()) throw new StingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!new File(bwtFiles.reverseSAFileName).exists()) throw new StingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.annFile.exists()) throw new StingException("ANN file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.ambFile.exists()) throw new StingException("AMB file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.pacFile.exists()) throw new StingException("PAC file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.forwardBWTFile.exists()) throw new StingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.forwardSAFile.exists()) throw new StingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.reverseBWTFile.exists()) throw new StingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
if(!bwtFiles.reverseSAFile.exists()) throw new StingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||
|
||||
thunkPointer = create(bwtFiles,configuration);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an aligner object using an array of bytes as a reference.
|
||||
* @param referenceSequence Reference sequence to encode ad-hoc.
|
||||
* @param configuration Configuration for the given aligner.
|
||||
*/
|
||||
public BWACAligner(byte[] referenceSequence, BWAConfiguration configuration) {
|
||||
this(BWTFiles.createFromReferenceSequence(referenceSequence),configuration);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the configuration passed to the BWA aligner.
|
||||
* @param configuration New configuration to set.
|
||||
|
|
@ -63,6 +70,7 @@ public class BWACAligner extends BWAAligner {
|
|||
if(thunkPointer == 0)
|
||||
throw new StingException("BWA/C close attempted, but BWA/C is not properly initialized.");
|
||||
destroy(thunkPointer);
|
||||
super.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,11 +1,9 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.java;
|
||||
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.*;
|
||||
import org.broadinstitute.sting.alignment.bwa.java.LowerBound;
|
||||
import org.broadinstitute.sting.alignment.bwa.java.BWAAlignment;
|
||||
import org.broadinstitute.sting.alignment.bwa.java.AlignmentState;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import org.broadinstitute.sting.alignment.Aligner;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -20,7 +18,7 @@ import net.sf.samtools.SAMFileHeader;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BWAJavaAligner implements Aligner {
|
||||
public class BWAJavaAligner extends BWAAligner {
|
||||
/**
|
||||
* BWT in the forward direction.
|
||||
*/
|
||||
|
|
@ -77,6 +75,7 @@ public class BWAJavaAligner implements Aligner {
|
|||
public final int INDEL_END_SKIP = 5;
|
||||
|
||||
public BWAJavaAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) {
|
||||
super(null,null);
|
||||
forwardBWT = new BWTReader(forwardBWTFile).read();
|
||||
reverseBWT = new BWTReader(reverseBWTFile).read();
|
||||
forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile,forwardBWT).read();
|
||||
|
|
@ -86,7 +85,18 @@ public class BWAJavaAligner implements Aligner {
|
|||
/**
|
||||
* Close this instance of the BWA pointer and delete its resources.
|
||||
*/
|
||||
public void close() { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
|
||||
@Override
|
||||
public void close() {
|
||||
super.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the current parameters of this aligner.
|
||||
* @param configuration New configuration to set.
|
||||
*/
|
||||
public void updateConfiguration(BWAConfiguration configuration) {
|
||||
throw new UnsupportedOperationException("Configuration of the BWA aligner can't currently be changed.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
|
||||
|
|
|
|||
|
|
@ -98,6 +98,25 @@ public class BWT {
|
|||
return counts.getTotal();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new BWT from the given reference sequence.
|
||||
* @param referenceSequence Sequence from which to derive the BWT.
|
||||
* @return reference sequence-derived BWT.
|
||||
*/
|
||||
public static BWT createFromReferenceSequence(byte[] referenceSequence) {
|
||||
SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
|
||||
|
||||
byte[] bwt = new byte[(int)suffixArray.length()-1];
|
||||
int bwtIndex = 0;
|
||||
for(long suffixArrayIndex = 0; suffixArrayIndex < suffixArray.length(); suffixArrayIndex++) {
|
||||
if(suffixArray.get(suffixArrayIndex) == 0)
|
||||
continue;
|
||||
bwt[bwtIndex++] = referenceSequence[(int)suffixArray.get(suffixArrayIndex)-1];
|
||||
}
|
||||
|
||||
return new BWT(suffixArray.inverseSA0,suffixArray.occurrences,bwt);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the base at a given position in the BWT.
|
||||
* @param index The index to use.
|
||||
|
|
|
|||
|
|
@ -20,12 +20,12 @@ public class BWTWriter {
|
|||
private final OutputStream outputStream;
|
||||
|
||||
/**
|
||||
* Create a new BWT reader.
|
||||
* @param inputFile File in which the BWT is stored.
|
||||
* Create a new BWT writer.
|
||||
* @param outputFile File in which the BWT is stored.
|
||||
*/
|
||||
public BWTWriter( File inputFile ) {
|
||||
public BWTWriter( File outputFile ) {
|
||||
try {
|
||||
this.outputStream = new BufferedOutputStream(new FileOutputStream(inputFile));
|
||||
this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new StingException("Unable to open output file", ex);
|
||||
|
|
|
|||
|
|
@ -28,11 +28,8 @@ package org.broadinstitute.sting.alignment.reference.bwt;
|
|||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.TreeSet;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||
|
|
@ -44,45 +41,29 @@ import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class CreateBWTFromReference {
|
||||
private String loadReference( File inputFile ) {
|
||||
private byte[] loadReference( File inputFile ) {
|
||||
// Read in the first sequence in the input file
|
||||
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
||||
ReferenceSequence sequence = reference.nextSequence();
|
||||
return StringUtil.bytesToString(sequence.getBases());
|
||||
return sequence.getBases();
|
||||
}
|
||||
|
||||
private String loadReverseReference( File inputFile ) {
|
||||
private byte[] loadReverseReference( File inputFile ) {
|
||||
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
||||
ReferenceSequence sequence = reference.nextSequence();
|
||||
PackUtils.reverse(sequence.getBases());
|
||||
return StringUtil.bytesToString(sequence.getBases());
|
||||
return sequence.getBases();
|
||||
}
|
||||
|
||||
private Counts countOccurrences( String sequence ) {
|
||||
private Counts countOccurrences( byte[] sequence ) {
|
||||
Counts occurrences = new Counts();
|
||||
for( char base: sequence.toCharArray() )
|
||||
occurrences.increment((byte)base);
|
||||
for( byte base: sequence )
|
||||
occurrences.increment(base);
|
||||
return occurrences;
|
||||
}
|
||||
|
||||
private long[] createSuffixArray( String sequence ) {
|
||||
TreeSet<Integer> suffixArrayBuilder = new TreeSet<Integer>( new SuffixArrayComparator(sequence) );
|
||||
|
||||
// Build out the suffix array using a custom comparator.
|
||||
System.out.printf("Creating sequence array of length %d%n", sequence.length() );
|
||||
for( int i = 0; i <= sequence.length(); i++ ) {
|
||||
suffixArrayBuilder.add(i);
|
||||
if( i % 100000 == 0 )
|
||||
System.out.printf("Added sequence %d%n", i);
|
||||
}
|
||||
|
||||
// Copy the suffix array into an int array.
|
||||
long[] suffixArray = new long[suffixArrayBuilder.size()];
|
||||
int i = 0;
|
||||
for( Integer element: suffixArrayBuilder )
|
||||
suffixArray[i++] = element;
|
||||
|
||||
return suffixArray;
|
||||
private long[] createSuffixArray( byte[] sequence ) {
|
||||
return SuffixArray.createFromReferenceSequence(sequence).sequence;
|
||||
}
|
||||
|
||||
private long[] invertSuffixArray( long[] suffixArray ) {
|
||||
|
|
@ -107,17 +88,6 @@ public class CreateBWTFromReference {
|
|||
return inverseCompressedSuffixArray;
|
||||
}
|
||||
|
||||
private byte[] createBWT( String sequence, long[] suffixArray ) {
|
||||
byte[] bwt = new byte[suffixArray.length-1];
|
||||
int i = 0;
|
||||
for( long suffixArrayEntry: suffixArray ) {
|
||||
if( suffixArrayEntry == 0 )
|
||||
continue;
|
||||
bwt[i++] = (byte)sequence.charAt((int)suffixArrayEntry-1);
|
||||
}
|
||||
return bwt;
|
||||
}
|
||||
|
||||
public static void main( String argv[] ) throws IOException {
|
||||
if( argv.length != 5 ) {
|
||||
System.out.println("USAGE: CreateBWTFromReference <input>.fasta <output bwt> <output rbwt> <output sa> <output rsa>");
|
||||
|
|
@ -141,8 +111,8 @@ public class CreateBWTFromReference {
|
|||
|
||||
CreateBWTFromReference creator = new CreateBWTFromReference();
|
||||
|
||||
String sequence = creator.loadReference(inputFile);
|
||||
String reverseSequence = creator.loadReverseReference(inputFile);
|
||||
byte[] sequence = creator.loadReference(inputFile);
|
||||
byte[] reverseSequence = creator.loadReverseReference(inputFile);
|
||||
|
||||
// Count the occurences of each given base.
|
||||
Counts occurrences = creator.countOccurrences(sequence);
|
||||
|
|
@ -162,13 +132,6 @@ public class CreateBWTFromReference {
|
|||
SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData );
|
||||
SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData );
|
||||
|
||||
/*
|
||||
for( int i = 0; i < 8; i++ )
|
||||
System.out.printf("suffixArray[%d] = %d (%s...)%n", i, suffixArray.sequence[i], sequence.substring(suffixArray.sequence[i],Math.min(suffixArray.sequence[i]+100,sequence.length())));
|
||||
for( int i = 0; i < 8; i++ )
|
||||
System.out.printf("inverseSuffixArray[%d] = %d (%s...)%n", i, inverseSuffixArray[i], sequence.substring(i,Math.min(i+100,sequence.length())));
|
||||
*/
|
||||
|
||||
/*
|
||||
// Create the data structure for the compressed suffix array and print diagnostics.
|
||||
int[] compressedSuffixArray = creator.createCompressedSuffixArray(suffixArray.sequence,inverseSuffixArray);
|
||||
|
|
@ -186,8 +149,8 @@ public class CreateBWTFromReference {
|
|||
*/
|
||||
|
||||
// Create the BWT.
|
||||
BWT bwt = new BWT(inverseSuffixArray[0], occurrences, creator.createBWT(sequence, suffixArray.sequence));
|
||||
BWT reverseBWT = new BWT( reverseInverseSuffixArray[0], occurrences, creator.createBWT(reverseSequence, reverseSuffixArray.sequence));
|
||||
BWT bwt = BWT.createFromReferenceSequence(sequence);
|
||||
BWT reverseBWT = BWT.createFromReferenceSequence(reverseSequence);
|
||||
|
||||
byte[] bwtSequence = bwt.getSequence();
|
||||
System.out.printf("BWT: %s... (length = %d)%n", new String(bwtSequence,0,80),bwt.length());
|
||||
|
|
@ -234,36 +197,4 @@ public class CreateBWTFromReference {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two suffix arrays of the given sequence. Will return whichever string appears
|
||||
* first in lexicographic order.
|
||||
*/
|
||||
public static class SuffixArrayComparator implements Comparator<Integer> {
|
||||
/**
|
||||
* The data source for all suffix arrays.
|
||||
*/
|
||||
private final String sequence;
|
||||
|
||||
/**
|
||||
* Create a new comparator.
|
||||
* @param sequence Reference sequence to use as basis for comparison.
|
||||
*/
|
||||
public SuffixArrayComparator( String sequence ) {
|
||||
this.sequence = sequence;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare the two given suffix arrays. Criteria for comparison is the lexicographic order of
|
||||
* the two substrings sequence[lhs:], sequence[rhs:].
|
||||
* @param lhs Left-hand side of comparison.
|
||||
* @param rhs Right-hand side of comparison.
|
||||
* @return How the suffix arrays represented by lhs, rhs compare.
|
||||
*/
|
||||
public int compare( Integer lhs, Integer rhs ) {
|
||||
String lhsSuffixArray = sequence.substring(lhs);
|
||||
String rhsSuffixArray = sequence.substring(rhs);
|
||||
return lhsSuffixArray.compareTo(rhsSuffixArray);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,11 @@ package org.broadinstitute.sting.alignment.reference.bwt;
|
|||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
|
||||
/**
|
||||
* An in-memory representation of a suffix array.
|
||||
*
|
||||
|
|
@ -81,4 +86,74 @@ public class SuffixArray {
|
|||
}
|
||||
return (sequence[(int)(index/sequenceInterval)]+iterations) % length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a suffix array from a given reference sequence.
|
||||
* @param sequence The reference sequence to use when building the suffix array.
|
||||
* @return a constructed suffix array.
|
||||
*/
|
||||
public static SuffixArray createFromReferenceSequence(byte[] sequence) {
|
||||
// The builder for the suffix array. Use an integer in this case because
|
||||
// Java arrays can only hold an integer.
|
||||
TreeSet<Integer> suffixArrayBuilder = new TreeSet<Integer>(new SuffixArrayComparator(sequence));
|
||||
|
||||
Counts occurrences = new Counts();
|
||||
for( byte base: sequence )
|
||||
occurrences.increment(base);
|
||||
|
||||
// Build out the suffix array using a custom comparator.
|
||||
for( int i = 0; i <= sequence.length; i++ )
|
||||
suffixArrayBuilder.add(i);
|
||||
|
||||
// Copy the suffix array into an array.
|
||||
long[] suffixArray = new long[suffixArrayBuilder.size()];
|
||||
int i = 0;
|
||||
for( Integer element: suffixArrayBuilder )
|
||||
suffixArray[i++] = element;
|
||||
|
||||
// Find the first element in the inverse suffix array.
|
||||
long inverseSA0 = -1;
|
||||
for(i = 0; i < sequence.length; i++) {
|
||||
if(suffixArray[i] == 0)
|
||||
inverseSA0 = i;
|
||||
}
|
||||
if(inverseSA0 < 0)
|
||||
throw new StingException("Unable to find first inverse SA entry in generated suffix array.");
|
||||
|
||||
return new SuffixArray(inverseSA0,occurrences,suffixArray);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two suffix arrays of the given sequence. Will return whichever string appears
|
||||
* first in lexicographic order.
|
||||
*/
|
||||
private static class SuffixArrayComparator implements Comparator<Integer> {
|
||||
/**
|
||||
* The data source for all suffix arrays.
|
||||
*/
|
||||
private final String sequence;
|
||||
|
||||
/**
|
||||
* Create a new comparator.
|
||||
* @param sequence Reference sequence to use as basis for comparison.
|
||||
*/
|
||||
public SuffixArrayComparator( byte[] sequence ) {
|
||||
// Processing the suffix array tends to be easier as a string.
|
||||
this.sequence = StringUtil.bytesToString(sequence);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare the two given suffix arrays. Criteria for comparison is the lexicographic order of
|
||||
* the two substrings sequence[lhs:], sequence[rhs:].
|
||||
* @param lhs Left-hand side of comparison.
|
||||
* @param rhs Right-hand side of comparison.
|
||||
* @return How the suffix arrays represented by lhs, rhs compare.
|
||||
*/
|
||||
public int compare( Integer lhs, Integer rhs ) {
|
||||
String lhsSuffixArray = sequence.substring(lhs);
|
||||
String rhsSuffixArray = sequence.substring(rhs);
|
||||
return lhsSuffixArray.compareTo(rhsSuffixArray);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,11 +20,11 @@ public class SuffixArrayWriter {
|
|||
|
||||
/**
|
||||
* Create a new suffix array reader.
|
||||
* @param inputFile File in which the suffix array is stored.
|
||||
* @param outputFile File in which the suffix array is stored.
|
||||
*/
|
||||
public SuffixArrayWriter( File inputFile ) {
|
||||
public SuffixArrayWriter( File outputFile ) {
|
||||
try {
|
||||
this.outputStream = new BufferedOutputStream(new FileOutputStream(inputFile));
|
||||
this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
|
||||
}
|
||||
catch( FileNotFoundException ex ) {
|
||||
throw new StingException("Unable to open input file", ex);
|
||||
|
|
|
|||
|
|
@ -53,23 +53,12 @@ public class CreatePACFromReference {
|
|||
ReferenceSequence sequence = reference.nextSequence();
|
||||
|
||||
// Target file for output
|
||||
writeSequence( new File(argv[1]), sequence.getBases() );
|
||||
PackUtils.writeReferenceSequence( new File(argv[1]), sequence.getBases() );
|
||||
|
||||
// Reverse the bases in the reference
|
||||
PackUtils.reverse(sequence.getBases());
|
||||
|
||||
// Target file for output
|
||||
writeSequence( new File(argv[2]), sequence.getBases() );
|
||||
}
|
||||
|
||||
private static void writeSequence( File outputFile, byte[] bases ) throws IOException {
|
||||
OutputStream outputStream = new FileOutputStream(outputFile);
|
||||
|
||||
BasePackedOutputStream<Byte> basePackedOutputStream = new BasePackedOutputStream<Byte>(Byte.class, outputStream, ByteOrder.BIG_ENDIAN);
|
||||
basePackedOutputStream.write(bases);
|
||||
|
||||
outputStream.write(bases.length%PackUtils.ALPHABET_SIZE);
|
||||
|
||||
outputStream.close();
|
||||
PackUtils.writeReferenceSequence( new File(argv[2]), sequence.getBases() );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,12 @@ package org.broadinstitute.sting.alignment.reference.packing;
|
|||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* Utilities designed for packing / unpacking bases.
|
||||
*
|
||||
|
|
@ -24,6 +30,24 @@ public class PackUtils {
|
|||
*/
|
||||
public static final int BITS_PER_BYTE = 8;
|
||||
|
||||
/**
|
||||
* Writes a reference sequence to a PAC file.
|
||||
* @param outputFile Filename for the PAC file.
|
||||
* @param referenceSequence Reference sequence to write.
|
||||
* @throws IOException If there's a problem writing to the output file.
|
||||
*/
|
||||
public static void writeReferenceSequence( File outputFile, byte[] referenceSequence ) throws IOException {
|
||||
OutputStream outputStream = new FileOutputStream(outputFile);
|
||||
|
||||
BasePackedOutputStream<Byte> basePackedOutputStream = new BasePackedOutputStream<Byte>(Byte.class, outputStream, ByteOrder.BIG_ENDIAN);
|
||||
basePackedOutputStream.write(referenceSequence);
|
||||
|
||||
outputStream.write(referenceSequence.length%PackUtils.ALPHABET_SIZE);
|
||||
|
||||
outputStream.close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* How many bits can a given type hold?
|
||||
* @param type Type to test.
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ import java.io.File;
|
|||
public class AlignerIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void testBasicAlignment() {
|
||||
String md5 = "ed098018ffcbd055bee966466044428a";
|
||||
String md5 = "c6d95d8ae707e78fefdaa7375f130995";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R /humgen/gsa-scr1/GATK_Data/bwa/human_b36_both.fasta" +
|
||||
" -T Align" +
|
||||
|
|
@ -24,6 +24,6 @@ public class AlignerIntegrationTest extends WalkerTest {
|
|||
" -ob %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
//executeTest("testBasicAlignment", spec);
|
||||
executeTest("testBasicAlignment", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue