Support for ad-hoc reference sequences. Also reenabled BWA/Java integration test, which was commented out
and the data backing it up deleted without my knowledge. Unfortunately, since the data was deleted, I had to regenerate the data and a new md5. Hopefully the aligner output is still correct. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2493 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
89f3ee614a
commit
b6ecc9e151
|
|
@ -12,27 +12,27 @@ typedef void (BWA::*int_setter)(int value);
|
||||||
typedef void (BWA::*float_setter)(float value);
|
typedef void (BWA::*float_setter)(float value);
|
||||||
|
|
||||||
static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment);
|
static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment);
|
||||||
static jstring get_configuration_string(JNIEnv* env, jobject configuration, const char* field_name);
|
static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name);
|
||||||
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter);
|
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter);
|
||||||
static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter);
|
static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter);
|
||||||
static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message);
|
static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message);
|
||||||
|
|
||||||
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration)
|
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_create(JNIEnv* env, jobject instance, jobject bwtFiles, jobject configuration)
|
||||||
{
|
{
|
||||||
jstring java_ann = get_configuration_string(env,bwtFiles,"annFileName");
|
jstring java_ann = get_configuration_file(env,bwtFiles,"annFile");
|
||||||
if(env->ExceptionCheck()) return 0L;
|
if(java_ann == NULL) return 0L;
|
||||||
jstring java_amb = get_configuration_string(env,bwtFiles,"ambFileName");
|
jstring java_amb = get_configuration_file(env,bwtFiles,"ambFile");
|
||||||
if(env->ExceptionCheck()) return 0L;
|
if(java_amb == NULL) return 0L;
|
||||||
jstring java_pac = get_configuration_string(env,bwtFiles,"pacFileName");
|
jstring java_pac = get_configuration_file(env,bwtFiles,"pacFile");
|
||||||
if(env->ExceptionCheck()) return 0L;
|
if(java_pac == NULL) return 0L;
|
||||||
jstring java_forward_bwt = get_configuration_string(env,bwtFiles,"forwardBWTFileName");
|
jstring java_forward_bwt = get_configuration_file(env,bwtFiles,"forwardBWTFile");
|
||||||
if(env->ExceptionCheck()) return 0L;
|
if(java_forward_bwt == NULL) return 0L;
|
||||||
jstring java_forward_sa = get_configuration_string(env,bwtFiles,"forwardSAFileName");
|
jstring java_forward_sa = get_configuration_file(env,bwtFiles,"forwardSAFile");
|
||||||
if(env->ExceptionCheck()) return 0L;
|
if(java_forward_sa == NULL) return 0L;
|
||||||
jstring java_reverse_bwt = get_configuration_string(env,bwtFiles,"reverseBWTFileName");
|
jstring java_reverse_bwt = get_configuration_file(env,bwtFiles,"reverseBWTFile");
|
||||||
if(env->ExceptionCheck()) return 0L;
|
if(java_reverse_bwt == NULL) return 0L;
|
||||||
jstring java_reverse_sa = get_configuration_string(env,bwtFiles,"reverseSAFileName");
|
jstring java_reverse_sa = get_configuration_file(env,bwtFiles,"reverseSAFile");
|
||||||
if(env->ExceptionCheck()) return 0L;
|
if(java_reverse_sa == NULL) return 0L;
|
||||||
|
|
||||||
const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE);
|
const char* ann_filename = env->GetStringUTFChars(java_ann,JNI_FALSE);
|
||||||
if(env->ExceptionCheck()) return 0L;
|
if(env->ExceptionCheck()) return 0L;
|
||||||
|
|
@ -332,16 +332,29 @@ static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, c
|
||||||
return java_alignment;
|
return java_alignment;
|
||||||
}
|
}
|
||||||
|
|
||||||
static jstring get_configuration_string(JNIEnv* env, jobject configuration, const char* field_name) {
|
static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name) {
|
||||||
jclass configuration_class = env->GetObjectClass(configuration);
|
jclass configuration_class = env->GetObjectClass(configuration);
|
||||||
if(configuration_class == NULL) return NULL;
|
if(configuration_class == NULL) return NULL;
|
||||||
|
|
||||||
jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/String;");
|
jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/io/File;");
|
||||||
if(configuration_field == NULL) return NULL;
|
if(configuration_field == NULL) return NULL;
|
||||||
|
|
||||||
jstring result = (jstring)env->GetObjectField(configuration,configuration_field);
|
jobject configuration_file = (jobject)env->GetObjectField(configuration,configuration_field);
|
||||||
env->DeleteLocalRef(configuration_class);
|
|
||||||
return result;
|
jclass file_class = env->FindClass("java/io/File");
|
||||||
|
if(file_class == NULL) return NULL;
|
||||||
|
|
||||||
|
jmethodID path_extractor = env->GetMethodID(file_class,"getAbsolutePath", "()Ljava/lang/String;");
|
||||||
|
if(path_extractor == NULL) return NULL;
|
||||||
|
|
||||||
|
jstring path = (jstring)env->CallObjectMethod(configuration_file,path_extractor);
|
||||||
|
if(path == NULL) return NULL;
|
||||||
|
|
||||||
|
env->DeleteLocalRef(configuration_class);
|
||||||
|
env->DeleteLocalRef(file_class);
|
||||||
|
env->DeleteLocalRef(configuration_file);
|
||||||
|
|
||||||
|
return path;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) {
|
static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) {
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,6 @@
|
||||||
package org.broadinstitute.sting.alignment.bwa;
|
package org.broadinstitute.sting.alignment.bwa;
|
||||||
|
|
||||||
import org.broadinstitute.sting.alignment.Aligner;
|
import org.broadinstitute.sting.alignment.Aligner;
|
||||||
import org.broadinstitute.sting.alignment.Alignment;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Align reads using BWA.
|
* Align reads using BWA.
|
||||||
|
|
@ -14,6 +9,15 @@ import java.util.Iterator;
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public abstract class BWAAligner implements Aligner {
|
public abstract class BWAAligner implements Aligner {
|
||||||
|
/**
|
||||||
|
* The supporting files used by BWA.
|
||||||
|
*/
|
||||||
|
protected BWTFiles bwtFiles;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The current configuration for the BWA aligner.
|
||||||
|
*/
|
||||||
|
protected BWAConfiguration configuration;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new BWAAligner. Purpose of this call is to ensure that all BWA constructors accept the correct
|
* Create a new BWAAligner. Purpose of this call is to ensure that all BWA constructors accept the correct
|
||||||
|
|
@ -21,7 +25,17 @@ public abstract class BWAAligner implements Aligner {
|
||||||
* @param bwtFiles The many files representing BWTs persisted to disk.
|
* @param bwtFiles The many files representing BWTs persisted to disk.
|
||||||
* @param configuration Configuration parameters for the alignment.
|
* @param configuration Configuration parameters for the alignment.
|
||||||
*/
|
*/
|
||||||
public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) { }
|
public BWAAligner(BWTFiles bwtFiles, BWAConfiguration configuration) {
|
||||||
|
this.bwtFiles = bwtFiles;
|
||||||
|
this.configuration = configuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the existing aligner, freeing resources it consumed.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
bwtFiles.close();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the configuration passed to the BWA aligner.
|
* Update the configuration passed to the BWA aligner.
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,19 @@
|
||||||
package org.broadinstitute.sting.alignment.bwa;
|
package org.broadinstitute.sting.alignment.bwa;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.bwt.BWT;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.bwt.BWTWriter;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.bwt.SuffixArray;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.bwt.SuffixArrayWriter;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.bwt.ANNWriter;
|
||||||
|
import org.broadinstitute.sting.alignment.reference.bwt.AMBWriter;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Support files for BWT.
|
* Support files for BWT.
|
||||||
|
|
@ -12,37 +25,42 @@ public class BWTFiles {
|
||||||
/**
|
/**
|
||||||
* ANN (?) file name.
|
* ANN (?) file name.
|
||||||
*/
|
*/
|
||||||
public final String annFileName;
|
public final File annFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* AMB (?) file name.
|
* AMB (?) file name.
|
||||||
*/
|
*/
|
||||||
public final String ambFileName;
|
public final File ambFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Packed reference sequence file.
|
* Packed reference sequence file.
|
||||||
*/
|
*/
|
||||||
public final String pacFileName;
|
public final File pacFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Forward BWT file.
|
* Forward BWT file.
|
||||||
*/
|
*/
|
||||||
public final String forwardBWTFileName;
|
public final File forwardBWTFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Forward suffix array file.
|
* Forward suffix array file.
|
||||||
*/
|
*/
|
||||||
public final String forwardSAFileName;
|
public final File forwardSAFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reverse BWT file.
|
* Reverse BWT file.
|
||||||
*/
|
*/
|
||||||
public final String reverseBWTFileName;
|
public final File reverseBWTFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reverse suffix array file.
|
* Reverse suffix array file.
|
||||||
*/
|
*/
|
||||||
public final String reverseSAFileName;
|
public final File reverseSAFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Where these files autogenerated on the fly?
|
||||||
|
*/
|
||||||
|
private final boolean autogenerated;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new BWA configuration file using the given prefix.
|
* Create a new BWA configuration file using the given prefix.
|
||||||
|
|
@ -51,12 +69,143 @@ public class BWTFiles {
|
||||||
public BWTFiles(String prefix) {
|
public BWTFiles(String prefix) {
|
||||||
if(prefix == null)
|
if(prefix == null)
|
||||||
throw new StingException("Prefix must not be null.");
|
throw new StingException("Prefix must not be null.");
|
||||||
annFileName = prefix + ".ann";
|
annFile = new File(prefix + ".ann");
|
||||||
ambFileName = prefix + ".amb";
|
ambFile = new File(prefix + ".amb");
|
||||||
pacFileName = prefix + ".pac";
|
pacFile = new File(prefix + ".pac");
|
||||||
forwardBWTFileName = prefix + ".bwt";
|
forwardBWTFile = new File(prefix + ".bwt");
|
||||||
forwardSAFileName = prefix + ".sa";
|
forwardSAFile = new File(prefix + ".sa");
|
||||||
reverseBWTFileName = prefix + ".rbwt";
|
reverseBWTFile = new File(prefix + ".rbwt");
|
||||||
reverseSAFileName = prefix + ".rsa";
|
reverseSAFile = new File(prefix + ".rsa");
|
||||||
|
autogenerated = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hand-create a new BWTFiles object, specifying a unique file object for each type.
|
||||||
|
* @param annFile ANN (alternate dictionary) file.
|
||||||
|
* @param ambFile AMB (holes) files.
|
||||||
|
* @param pacFile Packed representation of the forward reference sequence.
|
||||||
|
* @param forwardBWTFile BWT representation of the forward reference sequence.
|
||||||
|
* @param forwardSAFile SA representation of the forward reference sequence.
|
||||||
|
* @param reverseBWTFile BWT representation of the reversed reference sequence.
|
||||||
|
* @param reverseSAFile SA representation of the reversed reference sequence.
|
||||||
|
*/
|
||||||
|
private BWTFiles(File annFile,
|
||||||
|
File ambFile,
|
||||||
|
File pacFile,
|
||||||
|
File forwardBWTFile,
|
||||||
|
File forwardSAFile,
|
||||||
|
File reverseBWTFile,
|
||||||
|
File reverseSAFile) {
|
||||||
|
this.annFile = annFile;
|
||||||
|
this.ambFile = ambFile;
|
||||||
|
this.pacFile = pacFile;
|
||||||
|
this.forwardBWTFile = forwardBWTFile;
|
||||||
|
this.forwardSAFile = forwardSAFile;
|
||||||
|
this.reverseBWTFile = reverseBWTFile;
|
||||||
|
this.reverseSAFile = reverseSAFile;
|
||||||
|
autogenerated = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close out this files object, in the process deleting any temporary filse
|
||||||
|
* that were created.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
if(autogenerated) {
|
||||||
|
boolean success = true;
|
||||||
|
success = annFile.delete();
|
||||||
|
success &= ambFile.delete();
|
||||||
|
success &= pacFile.delete();
|
||||||
|
success &= forwardBWTFile.delete();
|
||||||
|
success &= forwardSAFile.delete();
|
||||||
|
success &= reverseBWTFile.delete();
|
||||||
|
success &= reverseSAFile.delete();
|
||||||
|
|
||||||
|
if(!success)
|
||||||
|
throw new StingException("Unable to clean up autogenerated representation");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new set of BWT files from the given reference sequence.
|
||||||
|
* @param referenceSequence Sequence from which to build metadata.
|
||||||
|
* @return A new object representing encoded representations of each sequence.
|
||||||
|
*/
|
||||||
|
public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) {
|
||||||
|
File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile;
|
||||||
|
try {
|
||||||
|
// Write the ann and amb for this reference sequence.
|
||||||
|
annFile = File.createTempFile("bwt","ann");
|
||||||
|
ambFile = File.createTempFile("bwt","amb");
|
||||||
|
|
||||||
|
SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
|
||||||
|
dictionary.addSequence(new SAMSequenceRecord("autogenerated",referenceSequence.length));
|
||||||
|
|
||||||
|
ANNWriter annWriter = new ANNWriter(annFile);
|
||||||
|
annWriter.write(dictionary);
|
||||||
|
annWriter.close();
|
||||||
|
|
||||||
|
AMBWriter ambWriter = new AMBWriter(ambFile);
|
||||||
|
ambWriter.writeEmpty(dictionary);
|
||||||
|
ambWriter.close();
|
||||||
|
|
||||||
|
// Write the encoded files for the forward version of this reference sequence.
|
||||||
|
pacFile = File.createTempFile("bwt","pac");
|
||||||
|
bwtFile = File.createTempFile("bwt","bwt");
|
||||||
|
saFile = File.createTempFile("bwt","sa");
|
||||||
|
|
||||||
|
writeEncodedReferenceSequence(referenceSequence,pacFile,bwtFile,saFile);
|
||||||
|
|
||||||
|
// Write the encoded files for the reverse version of this reference sequence.
|
||||||
|
byte[] reverseReferenceSequence = new byte[referenceSequence.length];
|
||||||
|
System.arraycopy(referenceSequence,0,reverseReferenceSequence,0,referenceSequence.length);
|
||||||
|
|
||||||
|
rpacFile = File.createTempFile("bwt","rpac");
|
||||||
|
rbwtFile = File.createTempFile("bwt","rbwt");
|
||||||
|
rsaFile = File.createTempFile("bwt","rsa");
|
||||||
|
|
||||||
|
writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile);
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new StingException("Unable to write autogenerated reference sequence to temporary files");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure that, at the very least, all temporary files are deleted on exit.
|
||||||
|
annFile.deleteOnExit();
|
||||||
|
ambFile.deleteOnExit();
|
||||||
|
pacFile.deleteOnExit();
|
||||||
|
bwtFile.deleteOnExit();
|
||||||
|
saFile.deleteOnExit();
|
||||||
|
rpacFile.deleteOnExit();
|
||||||
|
rbwtFile.deleteOnExit();
|
||||||
|
rsaFile.deleteOnExit();
|
||||||
|
|
||||||
|
return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rbwtFile,rsaFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the encoded form of the reference sequence. In the case of BWA, the encoded reference
|
||||||
|
* sequence is the reference itself in PAC format, the BWT, and the suffix array.
|
||||||
|
* @param referenceSequence The reference sequence to encode.
|
||||||
|
* @param pacFile Target for the PAC-encoded reference.
|
||||||
|
* @param bwtFile Target for the BWT representation of the reference.
|
||||||
|
* @param suffixArrayFile Target for the suffix array encoding of the reference.
|
||||||
|
* @throws java.io.IOException In case of issues writing to the file.
|
||||||
|
*/
|
||||||
|
private static void writeEncodedReferenceSequence(byte[] referenceSequence,
|
||||||
|
File pacFile,
|
||||||
|
File bwtFile,
|
||||||
|
File suffixArrayFile) throws IOException {
|
||||||
|
PackUtils.writeReferenceSequence(pacFile,referenceSequence);
|
||||||
|
|
||||||
|
BWT bwt = BWT.createFromReferenceSequence(referenceSequence);
|
||||||
|
BWTWriter bwtWriter = new BWTWriter(bwtFile);
|
||||||
|
bwtWriter.write(bwt);
|
||||||
|
bwtWriter.close();
|
||||||
|
|
||||||
|
SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
|
||||||
|
SuffixArrayWriter suffixArrayWriter = new SuffixArrayWriter(suffixArrayFile);
|
||||||
|
suffixArrayWriter.write(suffixArray);
|
||||||
|
suffixArrayWriter.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,14 +3,12 @@ package org.broadinstitute.sting.alignment.bwa.c;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import org.broadinstitute.sting.alignment.Alignment;
|
import org.broadinstitute.sting.alignment.Alignment;
|
||||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||||
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
|
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.File;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An aligner using the BWA/C implementation.
|
* An aligner using the BWA/C implementation.
|
||||||
|
|
@ -33,17 +31,26 @@ public class BWACAligner extends BWAAligner {
|
||||||
if(thunkPointer != 0)
|
if(thunkPointer != 0)
|
||||||
throw new StingException("BWA/C attempting to reinitialize.");
|
throw new StingException("BWA/C attempting to reinitialize.");
|
||||||
|
|
||||||
if(!new File(bwtFiles.annFileName).exists()) throw new StingException("ANN file is missing; please rerun 'bwa aln' to regenerate it.");
|
if(!bwtFiles.annFile.exists()) throw new StingException("ANN file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
if(!new File(bwtFiles.ambFileName).exists()) throw new StingException("AMB file is missing; please rerun 'bwa aln' to regenerate it.");
|
if(!bwtFiles.ambFile.exists()) throw new StingException("AMB file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
if(!new File(bwtFiles.pacFileName).exists()) throw new StingException("PAC file is missing; please rerun 'bwa aln' to regenerate it.");
|
if(!bwtFiles.pacFile.exists()) throw new StingException("PAC file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
if(!new File(bwtFiles.forwardBWTFileName).exists()) throw new StingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
if(!bwtFiles.forwardBWTFile.exists()) throw new StingException("Forward BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
if(!new File(bwtFiles.forwardSAFileName).exists()) throw new StingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
if(!bwtFiles.forwardSAFile.exists()) throw new StingException("Forward SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
if(!new File(bwtFiles.reverseBWTFileName).exists()) throw new StingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
if(!bwtFiles.reverseBWTFile.exists()) throw new StingException("Reverse BWT file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
if(!new File(bwtFiles.reverseSAFileName).exists()) throw new StingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
if(!bwtFiles.reverseSAFile.exists()) throw new StingException("Reverse SA file is missing; please rerun 'bwa aln' to regenerate it.");
|
||||||
|
|
||||||
thunkPointer = create(bwtFiles,configuration);
|
thunkPointer = create(bwtFiles,configuration);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an aligner object using an array of bytes as a reference.
|
||||||
|
* @param referenceSequence Reference sequence to encode ad-hoc.
|
||||||
|
* @param configuration Configuration for the given aligner.
|
||||||
|
*/
|
||||||
|
public BWACAligner(byte[] referenceSequence, BWAConfiguration configuration) {
|
||||||
|
this(BWTFiles.createFromReferenceSequence(referenceSequence),configuration);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the configuration passed to the BWA aligner.
|
* Update the configuration passed to the BWA aligner.
|
||||||
* @param configuration New configuration to set.
|
* @param configuration New configuration to set.
|
||||||
|
|
@ -63,6 +70,7 @@ public class BWACAligner extends BWAAligner {
|
||||||
if(thunkPointer == 0)
|
if(thunkPointer == 0)
|
||||||
throw new StingException("BWA/C close attempted, but BWA/C is not properly initialized.");
|
throw new StingException("BWA/C close attempted, but BWA/C is not properly initialized.");
|
||||||
destroy(thunkPointer);
|
destroy(thunkPointer);
|
||||||
|
super.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,9 @@
|
||||||
package org.broadinstitute.sting.alignment.bwa.java;
|
package org.broadinstitute.sting.alignment.bwa.java;
|
||||||
|
|
||||||
import org.broadinstitute.sting.alignment.reference.bwt.*;
|
import org.broadinstitute.sting.alignment.reference.bwt.*;
|
||||||
import org.broadinstitute.sting.alignment.bwa.java.LowerBound;
|
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
|
||||||
import org.broadinstitute.sting.alignment.bwa.java.BWAAlignment;
|
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||||
import org.broadinstitute.sting.alignment.bwa.java.AlignmentState;
|
|
||||||
import org.broadinstitute.sting.alignment.Alignment;
|
import org.broadinstitute.sting.alignment.Alignment;
|
||||||
import org.broadinstitute.sting.alignment.Aligner;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -20,7 +18,7 @@ import net.sf.samtools.SAMFileHeader;
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class BWAJavaAligner implements Aligner {
|
public class BWAJavaAligner extends BWAAligner {
|
||||||
/**
|
/**
|
||||||
* BWT in the forward direction.
|
* BWT in the forward direction.
|
||||||
*/
|
*/
|
||||||
|
|
@ -77,6 +75,7 @@ public class BWAJavaAligner implements Aligner {
|
||||||
public final int INDEL_END_SKIP = 5;
|
public final int INDEL_END_SKIP = 5;
|
||||||
|
|
||||||
public BWAJavaAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) {
|
public BWAJavaAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) {
|
||||||
|
super(null,null);
|
||||||
forwardBWT = new BWTReader(forwardBWTFile).read();
|
forwardBWT = new BWTReader(forwardBWTFile).read();
|
||||||
reverseBWT = new BWTReader(reverseBWTFile).read();
|
reverseBWT = new BWTReader(reverseBWTFile).read();
|
||||||
forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile,forwardBWT).read();
|
forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile,forwardBWT).read();
|
||||||
|
|
@ -86,7 +85,18 @@ public class BWAJavaAligner implements Aligner {
|
||||||
/**
|
/**
|
||||||
* Close this instance of the BWA pointer and delete its resources.
|
* Close this instance of the BWA pointer and delete its resources.
|
||||||
*/
|
*/
|
||||||
public void close() { throw new UnsupportedOperationException("BWAJavaAligner does not yet support the standard Aligner interface."); }
|
@Override
|
||||||
|
public void close() {
|
||||||
|
super.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the current parameters of this aligner.
|
||||||
|
* @param configuration New configuration to set.
|
||||||
|
*/
|
||||||
|
public void updateConfiguration(BWAConfiguration configuration) {
|
||||||
|
throw new UnsupportedOperationException("Configuration of the BWA aligner can't currently be changed.");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
|
* Allow the aligner to choose one alignment randomly from the pile of best alignments.
|
||||||
|
|
|
||||||
|
|
@ -98,6 +98,25 @@ public class BWT {
|
||||||
return counts.getTotal();
|
return counts.getTotal();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new BWT from the given reference sequence.
|
||||||
|
* @param referenceSequence Sequence from which to derive the BWT.
|
||||||
|
* @return reference sequence-derived BWT.
|
||||||
|
*/
|
||||||
|
public static BWT createFromReferenceSequence(byte[] referenceSequence) {
|
||||||
|
SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
|
||||||
|
|
||||||
|
byte[] bwt = new byte[(int)suffixArray.length()-1];
|
||||||
|
int bwtIndex = 0;
|
||||||
|
for(long suffixArrayIndex = 0; suffixArrayIndex < suffixArray.length(); suffixArrayIndex++) {
|
||||||
|
if(suffixArray.get(suffixArrayIndex) == 0)
|
||||||
|
continue;
|
||||||
|
bwt[bwtIndex++] = referenceSequence[(int)suffixArray.get(suffixArrayIndex)-1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return new BWT(suffixArray.inverseSA0,suffixArray.occurrences,bwt);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the base at a given position in the BWT.
|
* Gets the base at a given position in the BWT.
|
||||||
* @param index The index to use.
|
* @param index The index to use.
|
||||||
|
|
|
||||||
|
|
@ -20,12 +20,12 @@ public class BWTWriter {
|
||||||
private final OutputStream outputStream;
|
private final OutputStream outputStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new BWT reader.
|
* Create a new BWT writer.
|
||||||
* @param inputFile File in which the BWT is stored.
|
* @param outputFile File in which the BWT is stored.
|
||||||
*/
|
*/
|
||||||
public BWTWriter( File inputFile ) {
|
public BWTWriter( File outputFile ) {
|
||||||
try {
|
try {
|
||||||
this.outputStream = new BufferedOutputStream(new FileOutputStream(inputFile));
|
this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
|
||||||
}
|
}
|
||||||
catch( FileNotFoundException ex ) {
|
catch( FileNotFoundException ex ) {
|
||||||
throw new StingException("Unable to open output file", ex);
|
throw new StingException("Unable to open output file", ex);
|
||||||
|
|
|
||||||
|
|
@ -28,11 +28,8 @@ package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
import net.sf.samtools.util.StringUtil;
|
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.TreeSet;
|
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||||
|
|
@ -44,45 +41,29 @@ import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class CreateBWTFromReference {
|
public class CreateBWTFromReference {
|
||||||
private String loadReference( File inputFile ) {
|
private byte[] loadReference( File inputFile ) {
|
||||||
// Read in the first sequence in the input file
|
// Read in the first sequence in the input file
|
||||||
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
||||||
ReferenceSequence sequence = reference.nextSequence();
|
ReferenceSequence sequence = reference.nextSequence();
|
||||||
return StringUtil.bytesToString(sequence.getBases());
|
return sequence.getBases();
|
||||||
}
|
}
|
||||||
|
|
||||||
private String loadReverseReference( File inputFile ) {
|
private byte[] loadReverseReference( File inputFile ) {
|
||||||
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(inputFile);
|
||||||
ReferenceSequence sequence = reference.nextSequence();
|
ReferenceSequence sequence = reference.nextSequence();
|
||||||
PackUtils.reverse(sequence.getBases());
|
PackUtils.reverse(sequence.getBases());
|
||||||
return StringUtil.bytesToString(sequence.getBases());
|
return sequence.getBases();
|
||||||
}
|
}
|
||||||
|
|
||||||
private Counts countOccurrences( String sequence ) {
|
private Counts countOccurrences( byte[] sequence ) {
|
||||||
Counts occurrences = new Counts();
|
Counts occurrences = new Counts();
|
||||||
for( char base: sequence.toCharArray() )
|
for( byte base: sequence )
|
||||||
occurrences.increment((byte)base);
|
occurrences.increment(base);
|
||||||
return occurrences;
|
return occurrences;
|
||||||
}
|
}
|
||||||
|
|
||||||
private long[] createSuffixArray( String sequence ) {
|
private long[] createSuffixArray( byte[] sequence ) {
|
||||||
TreeSet<Integer> suffixArrayBuilder = new TreeSet<Integer>( new SuffixArrayComparator(sequence) );
|
return SuffixArray.createFromReferenceSequence(sequence).sequence;
|
||||||
|
|
||||||
// Build out the suffix array using a custom comparator.
|
|
||||||
System.out.printf("Creating sequence array of length %d%n", sequence.length() );
|
|
||||||
for( int i = 0; i <= sequence.length(); i++ ) {
|
|
||||||
suffixArrayBuilder.add(i);
|
|
||||||
if( i % 100000 == 0 )
|
|
||||||
System.out.printf("Added sequence %d%n", i);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy the suffix array into an int array.
|
|
||||||
long[] suffixArray = new long[suffixArrayBuilder.size()];
|
|
||||||
int i = 0;
|
|
||||||
for( Integer element: suffixArrayBuilder )
|
|
||||||
suffixArray[i++] = element;
|
|
||||||
|
|
||||||
return suffixArray;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private long[] invertSuffixArray( long[] suffixArray ) {
|
private long[] invertSuffixArray( long[] suffixArray ) {
|
||||||
|
|
@ -107,17 +88,6 @@ public class CreateBWTFromReference {
|
||||||
return inverseCompressedSuffixArray;
|
return inverseCompressedSuffixArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] createBWT( String sequence, long[] suffixArray ) {
|
|
||||||
byte[] bwt = new byte[suffixArray.length-1];
|
|
||||||
int i = 0;
|
|
||||||
for( long suffixArrayEntry: suffixArray ) {
|
|
||||||
if( suffixArrayEntry == 0 )
|
|
||||||
continue;
|
|
||||||
bwt[i++] = (byte)sequence.charAt((int)suffixArrayEntry-1);
|
|
||||||
}
|
|
||||||
return bwt;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void main( String argv[] ) throws IOException {
|
public static void main( String argv[] ) throws IOException {
|
||||||
if( argv.length != 5 ) {
|
if( argv.length != 5 ) {
|
||||||
System.out.println("USAGE: CreateBWTFromReference <input>.fasta <output bwt> <output rbwt> <output sa> <output rsa>");
|
System.out.println("USAGE: CreateBWTFromReference <input>.fasta <output bwt> <output rbwt> <output sa> <output rsa>");
|
||||||
|
|
@ -141,8 +111,8 @@ public class CreateBWTFromReference {
|
||||||
|
|
||||||
CreateBWTFromReference creator = new CreateBWTFromReference();
|
CreateBWTFromReference creator = new CreateBWTFromReference();
|
||||||
|
|
||||||
String sequence = creator.loadReference(inputFile);
|
byte[] sequence = creator.loadReference(inputFile);
|
||||||
String reverseSequence = creator.loadReverseReference(inputFile);
|
byte[] reverseSequence = creator.loadReverseReference(inputFile);
|
||||||
|
|
||||||
// Count the occurences of each given base.
|
// Count the occurences of each given base.
|
||||||
Counts occurrences = creator.countOccurrences(sequence);
|
Counts occurrences = creator.countOccurrences(sequence);
|
||||||
|
|
@ -162,13 +132,6 @@ public class CreateBWTFromReference {
|
||||||
SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData );
|
SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData );
|
||||||
SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData );
|
SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData );
|
||||||
|
|
||||||
/*
|
|
||||||
for( int i = 0; i < 8; i++ )
|
|
||||||
System.out.printf("suffixArray[%d] = %d (%s...)%n", i, suffixArray.sequence[i], sequence.substring(suffixArray.sequence[i],Math.min(suffixArray.sequence[i]+100,sequence.length())));
|
|
||||||
for( int i = 0; i < 8; i++ )
|
|
||||||
System.out.printf("inverseSuffixArray[%d] = %d (%s...)%n", i, inverseSuffixArray[i], sequence.substring(i,Math.min(i+100,sequence.length())));
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// Create the data structure for the compressed suffix array and print diagnostics.
|
// Create the data structure for the compressed suffix array and print diagnostics.
|
||||||
int[] compressedSuffixArray = creator.createCompressedSuffixArray(suffixArray.sequence,inverseSuffixArray);
|
int[] compressedSuffixArray = creator.createCompressedSuffixArray(suffixArray.sequence,inverseSuffixArray);
|
||||||
|
|
@ -186,8 +149,8 @@ public class CreateBWTFromReference {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Create the BWT.
|
// Create the BWT.
|
||||||
BWT bwt = new BWT(inverseSuffixArray[0], occurrences, creator.createBWT(sequence, suffixArray.sequence));
|
BWT bwt = BWT.createFromReferenceSequence(sequence);
|
||||||
BWT reverseBWT = new BWT( reverseInverseSuffixArray[0], occurrences, creator.createBWT(reverseSequence, reverseSuffixArray.sequence));
|
BWT reverseBWT = BWT.createFromReferenceSequence(reverseSequence);
|
||||||
|
|
||||||
byte[] bwtSequence = bwt.getSequence();
|
byte[] bwtSequence = bwt.getSequence();
|
||||||
System.out.printf("BWT: %s... (length = %d)%n", new String(bwtSequence,0,80),bwt.length());
|
System.out.printf("BWT: %s... (length = %d)%n", new String(bwtSequence,0,80),bwt.length());
|
||||||
|
|
@ -234,36 +197,4 @@ public class CreateBWTFromReference {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Compares two suffix arrays of the given sequence. Will return whichever string appears
|
|
||||||
* first in lexicographic order.
|
|
||||||
*/
|
|
||||||
public static class SuffixArrayComparator implements Comparator<Integer> {
|
|
||||||
/**
|
|
||||||
* The data source for all suffix arrays.
|
|
||||||
*/
|
|
||||||
private final String sequence;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new comparator.
|
|
||||||
* @param sequence Reference sequence to use as basis for comparison.
|
|
||||||
*/
|
|
||||||
public SuffixArrayComparator( String sequence ) {
|
|
||||||
this.sequence = sequence;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compare the two given suffix arrays. Criteria for comparison is the lexicographic order of
|
|
||||||
* the two substrings sequence[lhs:], sequence[rhs:].
|
|
||||||
* @param lhs Left-hand side of comparison.
|
|
||||||
* @param rhs Right-hand side of comparison.
|
|
||||||
* @return How the suffix arrays represented by lhs, rhs compare.
|
|
||||||
*/
|
|
||||||
public int compare( Integer lhs, Integer rhs ) {
|
|
||||||
String lhsSuffixArray = sequence.substring(lhs);
|
|
||||||
String rhsSuffixArray = sequence.substring(rhs);
|
|
||||||
return lhsSuffixArray.compareTo(rhsSuffixArray);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,11 @@ package org.broadinstitute.sting.alignment.reference.bwt;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An in-memory representation of a suffix array.
|
* An in-memory representation of a suffix array.
|
||||||
*
|
*
|
||||||
|
|
@ -81,4 +86,74 @@ public class SuffixArray {
|
||||||
}
|
}
|
||||||
return (sequence[(int)(index/sequenceInterval)]+iterations) % length();
|
return (sequence[(int)(index/sequenceInterval)]+iterations) % length();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a suffix array from a given reference sequence.
|
||||||
|
* @param sequence The reference sequence to use when building the suffix array.
|
||||||
|
* @return a constructed suffix array.
|
||||||
|
*/
|
||||||
|
public static SuffixArray createFromReferenceSequence(byte[] sequence) {
|
||||||
|
// The builder for the suffix array. Use an integer in this case because
|
||||||
|
// Java arrays can only hold an integer.
|
||||||
|
TreeSet<Integer> suffixArrayBuilder = new TreeSet<Integer>(new SuffixArrayComparator(sequence));
|
||||||
|
|
||||||
|
Counts occurrences = new Counts();
|
||||||
|
for( byte base: sequence )
|
||||||
|
occurrences.increment(base);
|
||||||
|
|
||||||
|
// Build out the suffix array using a custom comparator.
|
||||||
|
for( int i = 0; i <= sequence.length; i++ )
|
||||||
|
suffixArrayBuilder.add(i);
|
||||||
|
|
||||||
|
// Copy the suffix array into an array.
|
||||||
|
long[] suffixArray = new long[suffixArrayBuilder.size()];
|
||||||
|
int i = 0;
|
||||||
|
for( Integer element: suffixArrayBuilder )
|
||||||
|
suffixArray[i++] = element;
|
||||||
|
|
||||||
|
// Find the first element in the inverse suffix array.
|
||||||
|
long inverseSA0 = -1;
|
||||||
|
for(i = 0; i < sequence.length; i++) {
|
||||||
|
if(suffixArray[i] == 0)
|
||||||
|
inverseSA0 = i;
|
||||||
|
}
|
||||||
|
if(inverseSA0 < 0)
|
||||||
|
throw new StingException("Unable to find first inverse SA entry in generated suffix array.");
|
||||||
|
|
||||||
|
return new SuffixArray(inverseSA0,occurrences,suffixArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compares two suffix arrays of the given sequence. Will return whichever string appears
|
||||||
|
* first in lexicographic order.
|
||||||
|
*/
|
||||||
|
private static class SuffixArrayComparator implements Comparator<Integer> {
|
||||||
|
/**
|
||||||
|
* The data source for all suffix arrays.
|
||||||
|
*/
|
||||||
|
private final String sequence;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new comparator.
|
||||||
|
* @param sequence Reference sequence to use as basis for comparison.
|
||||||
|
*/
|
||||||
|
public SuffixArrayComparator( byte[] sequence ) {
|
||||||
|
// Processing the suffix array tends to be easier as a string.
|
||||||
|
this.sequence = StringUtil.bytesToString(sequence);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compare the two given suffix arrays. Criteria for comparison is the lexicographic order of
|
||||||
|
* the two substrings sequence[lhs:], sequence[rhs:].
|
||||||
|
* @param lhs Left-hand side of comparison.
|
||||||
|
* @param rhs Right-hand side of comparison.
|
||||||
|
* @return How the suffix arrays represented by lhs, rhs compare.
|
||||||
|
*/
|
||||||
|
public int compare( Integer lhs, Integer rhs ) {
|
||||||
|
String lhsSuffixArray = sequence.substring(lhs);
|
||||||
|
String rhsSuffixArray = sequence.substring(rhs);
|
||||||
|
return lhsSuffixArray.compareTo(rhsSuffixArray);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -20,11 +20,11 @@ public class SuffixArrayWriter {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new suffix array reader.
|
* Create a new suffix array reader.
|
||||||
* @param inputFile File in which the suffix array is stored.
|
* @param outputFile File in which the suffix array is stored.
|
||||||
*/
|
*/
|
||||||
public SuffixArrayWriter( File inputFile ) {
|
public SuffixArrayWriter( File outputFile ) {
|
||||||
try {
|
try {
|
||||||
this.outputStream = new BufferedOutputStream(new FileOutputStream(inputFile));
|
this.outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
|
||||||
}
|
}
|
||||||
catch( FileNotFoundException ex ) {
|
catch( FileNotFoundException ex ) {
|
||||||
throw new StingException("Unable to open input file", ex);
|
throw new StingException("Unable to open input file", ex);
|
||||||
|
|
|
||||||
|
|
@ -53,23 +53,12 @@ public class CreatePACFromReference {
|
||||||
ReferenceSequence sequence = reference.nextSequence();
|
ReferenceSequence sequence = reference.nextSequence();
|
||||||
|
|
||||||
// Target file for output
|
// Target file for output
|
||||||
writeSequence( new File(argv[1]), sequence.getBases() );
|
PackUtils.writeReferenceSequence( new File(argv[1]), sequence.getBases() );
|
||||||
|
|
||||||
// Reverse the bases in the reference
|
// Reverse the bases in the reference
|
||||||
PackUtils.reverse(sequence.getBases());
|
PackUtils.reverse(sequence.getBases());
|
||||||
|
|
||||||
// Target file for output
|
// Target file for output
|
||||||
writeSequence( new File(argv[2]), sequence.getBases() );
|
PackUtils.writeReferenceSequence( new File(argv[2]), sequence.getBases() );
|
||||||
}
|
|
||||||
|
|
||||||
private static void writeSequence( File outputFile, byte[] bases ) throws IOException {
|
|
||||||
OutputStream outputStream = new FileOutputStream(outputFile);
|
|
||||||
|
|
||||||
BasePackedOutputStream<Byte> basePackedOutputStream = new BasePackedOutputStream<Byte>(Byte.class, outputStream, ByteOrder.BIG_ENDIAN);
|
|
||||||
basePackedOutputStream.write(bases);
|
|
||||||
|
|
||||||
outputStream.write(bases.length%PackUtils.ALPHABET_SIZE);
|
|
||||||
|
|
||||||
outputStream.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,12 @@ package org.broadinstitute.sting.alignment.reference.packing;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utilities designed for packing / unpacking bases.
|
* Utilities designed for packing / unpacking bases.
|
||||||
*
|
*
|
||||||
|
|
@ -24,6 +30,24 @@ public class PackUtils {
|
||||||
*/
|
*/
|
||||||
public static final int BITS_PER_BYTE = 8;
|
public static final int BITS_PER_BYTE = 8;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a reference sequence to a PAC file.
|
||||||
|
* @param outputFile Filename for the PAC file.
|
||||||
|
* @param referenceSequence Reference sequence to write.
|
||||||
|
* @throws IOException If there's a problem writing to the output file.
|
||||||
|
*/
|
||||||
|
public static void writeReferenceSequence( File outputFile, byte[] referenceSequence ) throws IOException {
|
||||||
|
OutputStream outputStream = new FileOutputStream(outputFile);
|
||||||
|
|
||||||
|
BasePackedOutputStream<Byte> basePackedOutputStream = new BasePackedOutputStream<Byte>(Byte.class, outputStream, ByteOrder.BIG_ENDIAN);
|
||||||
|
basePackedOutputStream.write(referenceSequence);
|
||||||
|
|
||||||
|
outputStream.write(referenceSequence.length%PackUtils.ALPHABET_SIZE);
|
||||||
|
|
||||||
|
outputStream.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* How many bits can a given type hold?
|
* How many bits can a given type hold?
|
||||||
* @param type Type to test.
|
* @param type Type to test.
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ import java.io.File;
|
||||||
public class AlignerIntegrationTest extends WalkerTest {
|
public class AlignerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testBasicAlignment() {
|
public void testBasicAlignment() {
|
||||||
String md5 = "ed098018ffcbd055bee966466044428a";
|
String md5 = "c6d95d8ae707e78fefdaa7375f130995";
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-R /humgen/gsa-scr1/GATK_Data/bwa/human_b36_both.fasta" +
|
"-R /humgen/gsa-scr1/GATK_Data/bwa/human_b36_both.fasta" +
|
||||||
" -T Align" +
|
" -T Align" +
|
||||||
|
|
@ -24,6 +24,6 @@ public class AlignerIntegrationTest extends WalkerTest {
|
||||||
" -ob %s",
|
" -ob %s",
|
||||||
1, // just one output file
|
1, // just one output file
|
||||||
Arrays.asList(md5));
|
Arrays.asList(md5));
|
||||||
//executeTest("testBasicAlignment", spec);
|
executeTest("testBasicAlignment", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue