Finishing off data transfer conduits for single alignment generator.
Misc bug fixes elsewhere. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2101 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2a5349d886
commit
38a030f2ba
|
|
@ -56,12 +56,18 @@ void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*&
|
||||||
bwa_free_read_seq(1,sequence);
|
bwa_free_read_seq(1,sequence);
|
||||||
}
|
}
|
||||||
|
|
||||||
Alignment BWA::generate_single_alignment(const char* bases, const unsigned read_length) {
|
Alignment* BWA::generate_single_alignment(const char* bases, const unsigned read_length) {
|
||||||
bwa_seq_t* sequence = create_sequence(bases,read_length);
|
bwa_seq_t* sequence = create_sequence(bases,read_length);
|
||||||
|
|
||||||
// Calculate paths.
|
// Calculate paths.
|
||||||
bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);
|
bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);
|
||||||
|
|
||||||
|
// Check for no alignments found and return null.
|
||||||
|
if(sequence->n_aln == 0) {
|
||||||
|
bwa_free_read_seq(1,sequence);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
// bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in.
|
// bwa_cal_sa_reg_gap destroys the bases / read length. Copy them back in.
|
||||||
copy_bases_into_sequence(sequence,bases,read_length);
|
copy_bases_into_sequence(sequence,bases,read_length);
|
||||||
|
|
||||||
|
|
@ -69,7 +75,8 @@ Alignment BWA::generate_single_alignment(const char* bases, const unsigned read_
|
||||||
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
|
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
|
||||||
|
|
||||||
// Generate the best alignment from the sequence.
|
// Generate the best alignment from the sequence.
|
||||||
Alignment alignment = generate_final_alignment_from_sequence(sequence);
|
Alignment* alignment = new Alignment;
|
||||||
|
*alignment = generate_final_alignment_from_sequence(sequence);
|
||||||
|
|
||||||
bwa_free_read_seq(1,sequence);
|
bwa_free_read_seq(1,sequence);
|
||||||
|
|
||||||
|
|
@ -148,6 +155,7 @@ Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) {
|
||||||
Alignment alignment;
|
Alignment alignment;
|
||||||
|
|
||||||
// Populate basic path info
|
// Populate basic path info
|
||||||
|
alignment.edit_distance = sequence->nm;
|
||||||
alignment.num_mismatches = sequence->n_mm;
|
alignment.num_mismatches = sequence->n_mm;
|
||||||
alignment.num_gap_opens = sequence->n_gapo;
|
alignment.num_gap_opens = sequence->n_gapo;
|
||||||
alignment.num_gap_extensions = sequence->n_gape;
|
alignment.num_gap_extensions = sequence->n_gape;
|
||||||
|
|
@ -168,7 +176,9 @@ Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) {
|
||||||
memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
|
memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
|
||||||
}
|
}
|
||||||
alignment.n_cigar = sequence->n_cigar;
|
alignment.n_cigar = sequence->n_cigar;
|
||||||
|
|
||||||
|
// MD tag with a better breakdown of differences in the cigar
|
||||||
|
alignment.md = strdup(sequence->md);
|
||||||
delete[] sequence->md;
|
delete[] sequence->md;
|
||||||
sequence->md = NULL;
|
sequence->md = NULL;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,8 +21,12 @@ class Alignment {
|
||||||
uint8_t num_mismatches;
|
uint8_t num_mismatches;
|
||||||
uint8_t num_gap_opens;
|
uint8_t num_gap_opens;
|
||||||
uint8_t num_gap_extensions;
|
uint8_t num_gap_extensions;
|
||||||
|
uint16_t edit_distance;
|
||||||
|
|
||||||
uint32_t num_best;
|
uint32_t num_best;
|
||||||
uint32_t num_second_best;
|
uint32_t num_second_best;
|
||||||
|
|
||||||
|
char* md;
|
||||||
};
|
};
|
||||||
|
|
||||||
class BWA {
|
class BWA {
|
||||||
|
|
@ -57,8 +61,8 @@ class BWA {
|
||||||
void set_gap_extension_penalty(int penalty);
|
void set_gap_extension_penalty(int penalty);
|
||||||
|
|
||||||
// Perform the alignment
|
// Perform the alignment
|
||||||
Alignment generate_single_alignment(const char* bases,
|
Alignment* generate_single_alignment(const char* bases,
|
||||||
const unsigned read_length);
|
const unsigned read_length);
|
||||||
void find_paths(const char* bases,
|
void find_paths(const char* bases,
|
||||||
const unsigned read_length,
|
const unsigned read_length,
|
||||||
bwt_aln1_t*& paths,
|
bwt_aln1_t*& paths,
|
||||||
|
|
|
||||||
|
|
@ -252,8 +252,9 @@ JNIEXPORT jobject JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAlig
|
||||||
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
|
||||||
if(read_bases == NULL) return NULL;
|
if(read_bases == NULL) return NULL;
|
||||||
|
|
||||||
Alignment best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length);
|
Alignment* best_alignment = bwa->generate_single_alignment((const char*)read_bases,read_length);
|
||||||
jobject java_best_alignment = convert_to_java_alignment(env,read_bases,read_length,best_alignment);
|
jobject java_best_alignment = (best_alignment != NULL) ? convert_to_java_alignment(env,read_bases,read_length,*best_alignment) : NULL;
|
||||||
|
delete best_alignment;
|
||||||
|
|
||||||
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
|
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
|
||||||
|
|
||||||
|
|
@ -291,12 +292,17 @@ static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, c
|
||||||
if(env->ExceptionCheck()) return NULL;
|
if(env->ExceptionCheck()) return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
delete[] alignment.cigar;
|
||||||
|
|
||||||
jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment");
|
jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment");
|
||||||
if(java_alignment_class == NULL) return NULL;
|
if(java_alignment_class == NULL) return NULL;
|
||||||
|
|
||||||
jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[IIIIII)V");
|
jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[IILjava/lang/String;IIIII)V");
|
||||||
if(java_alignment_constructor == NULL) return NULL;
|
if(java_alignment_constructor == NULL) return NULL;
|
||||||
|
|
||||||
|
jstring java_md = env->NewStringUTF(alignment.md);
|
||||||
|
if(java_md == NULL) return NULL;
|
||||||
|
delete[] alignment.md;
|
||||||
|
|
||||||
jobject java_alignment = env->NewObject(java_alignment_class,
|
jobject java_alignment = env->NewObject(java_alignment_class,
|
||||||
java_alignment_constructor,
|
java_alignment_constructor,
|
||||||
|
|
@ -306,6 +312,8 @@ static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, c
|
||||||
alignment.mapping_quality,
|
alignment.mapping_quality,
|
||||||
java_cigar_operators,
|
java_cigar_operators,
|
||||||
java_cigar_lengths,
|
java_cigar_lengths,
|
||||||
|
alignment.edit_distance,
|
||||||
|
java_md,
|
||||||
alignment.num_mismatches,
|
alignment.num_mismatches,
|
||||||
alignment.num_gap_opens,
|
alignment.num_gap_opens,
|
||||||
alignment.num_gap_extensions,
|
alignment.num_gap_extensions,
|
||||||
|
|
@ -313,8 +321,6 @@ static jobject convert_to_java_alignment(JNIEnv *env, const jbyte* read_bases, c
|
||||||
alignment.num_second_best);
|
alignment.num_second_best);
|
||||||
if(java_alignment == NULL) return NULL;
|
if(java_alignment == NULL) return NULL;
|
||||||
|
|
||||||
delete[] alignment.cigar;
|
|
||||||
|
|
||||||
env->DeleteLocalRef(java_alignment_class);
|
env->DeleteLocalRef(java_alignment_class);
|
||||||
if(env->ExceptionCheck()) return NULL;
|
if(env->ExceptionCheck()) return NULL;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,9 @@ public class Alignment {
|
||||||
protected char[] cigarOperators;
|
protected char[] cigarOperators;
|
||||||
protected int[] cigarLengths;
|
protected int[] cigarLengths;
|
||||||
|
|
||||||
|
protected int editDistance;
|
||||||
|
protected String mismatchingPositions;
|
||||||
|
|
||||||
protected int numMismatches;
|
protected int numMismatches;
|
||||||
protected int numGapOpens;
|
protected int numGapOpens;
|
||||||
protected int numGapExtensions;
|
protected int numGapExtensions;
|
||||||
|
|
@ -49,6 +52,19 @@ public class Alignment {
|
||||||
*/
|
*/
|
||||||
public int getMappingQuality() { return mappingQuality; }
|
public int getMappingQuality() { return mappingQuality; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the edit distance; will eventually end up in the NM SAM tag
|
||||||
|
* if this alignment makes it that far.
|
||||||
|
* @return The edit distance.
|
||||||
|
*/
|
||||||
|
public int getEditDistance() { return editDistance; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A string representation of which positions mismatch; contents of MD tag.
|
||||||
|
* @return String representation of mismatching positions.
|
||||||
|
*/
|
||||||
|
public String getMismatchingPositions() { return mismatchingPositions; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the number of mismatches in the read.
|
* Gets the number of mismatches in the read.
|
||||||
* @return Number of mismatches.
|
* @return Number of mismatches.
|
||||||
|
|
@ -129,6 +145,8 @@ public class Alignment {
|
||||||
int mappingQuality,
|
int mappingQuality,
|
||||||
char[] cigarOperators,
|
char[] cigarOperators,
|
||||||
int[] cigarLengths,
|
int[] cigarLengths,
|
||||||
|
int editDistance,
|
||||||
|
String mismatchingPositions,
|
||||||
int numMismatches,
|
int numMismatches,
|
||||||
int numGapOpens,
|
int numGapOpens,
|
||||||
int numGapExtensions,
|
int numGapExtensions,
|
||||||
|
|
@ -140,6 +158,8 @@ public class Alignment {
|
||||||
this.mappingQuality = mappingQuality;
|
this.mappingQuality = mappingQuality;
|
||||||
this.cigarOperators = cigarOperators;
|
this.cigarOperators = cigarOperators;
|
||||||
this.cigarLengths = cigarLengths;
|
this.cigarLengths = cigarLengths;
|
||||||
|
this.editDistance = editDistance;
|
||||||
|
this.mismatchingPositions = mismatchingPositions;
|
||||||
this.numMismatches = numMismatches;
|
this.numMismatches = numMismatches;
|
||||||
this.numGapOpens = numGapOpens;
|
this.numGapOpens = numGapOpens;
|
||||||
this.numGapExtensions = numGapExtensions;
|
this.numGapExtensions = numGapExtensions;
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,8 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||||
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
|
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.*;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||||
import net.sf.samtools.SAMFileWriter;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Align reads to the reference specified by BWTPrefix.
|
* Align reads to the reference specified by BWTPrefix.
|
||||||
|
|
@ -37,6 +36,14 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
||||||
*/
|
*/
|
||||||
private SAMFileWriter outputBam = null;
|
private SAMFileWriter outputBam = null;
|
||||||
|
|
||||||
|
/** Must return true for reads that need to be processed. Reads, for which this method return false will
|
||||||
|
* be skipped by the engine and never passed to the walker.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean filter(char[] ref, SAMRecord read) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create an aligner object. The aligner object will load and hold the BWT until close() is called.
|
* Create an aligner object. The aligner object will load and hold the BWT until close() is called.
|
||||||
*/
|
*/
|
||||||
|
|
@ -45,8 +52,17 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
||||||
BWACConfiguration configuration = new BWACConfiguration(prefix);
|
BWACConfiguration configuration = new BWACConfiguration(prefix);
|
||||||
aligner = new BWACAligner(configuration);
|
aligner = new BWACAligner(configuration);
|
||||||
|
|
||||||
|
// HACK: If the sequence dictionary in the existing header is null, stuff the contents of the current reference
|
||||||
|
// into it, so that the sequence has something to which to back-align.
|
||||||
|
SAMFileHeader header = getToolkit().getSAMFileHeader();
|
||||||
|
if(header.getSequenceDictionary().isEmpty()) {
|
||||||
|
SAMSequenceDictionary referenceDictionary =
|
||||||
|
ReferenceSequenceFileFactory.getReferenceSequenceFile(getToolkit().getArguments().referenceFile).getSequenceDictionary();
|
||||||
|
header.setSequenceDictionary(referenceDictionary);
|
||||||
|
}
|
||||||
|
|
||||||
if ( outputBamFile != null ) {
|
if ( outputBamFile != null ) {
|
||||||
SAMFileHeader header = this.getToolkit().getSAMFileHeader();
|
// Stuff the header from the fasta into that of the sequence dictionary.
|
||||||
outputBam = Utils.createSAMFileWriterWithCompression(header, false, outputBamFile, bamCompression);
|
outputBam = Utils.createSAMFileWriterWithCompression(header, false, outputBamFile, bamCompression);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -145,21 +145,26 @@ public class BWACAligner {
|
||||||
/**
|
/**
|
||||||
* Creates a read directly from an alignment.
|
* Creates a read directly from an alignment.
|
||||||
* @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags.
|
* @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags.
|
||||||
* @param alignment The target alignment for this read.
|
* @param alignment The target alignment for this read. If alignment is null, assume the read is unmappe.d
|
||||||
* @return A mapped alignment.
|
* @return A mapped alignment.
|
||||||
*/
|
*/
|
||||||
public SAMRecord convertAlignmentToRead(SAMRecord unmappedRead, Alignment alignment) {
|
public SAMRecord convertAlignmentToRead(SAMRecord unmappedRead, Alignment alignment) {
|
||||||
SAMRecord read = null;
|
SAMRecord read = null;
|
||||||
try {
|
try {
|
||||||
read = (SAMRecord)unmappedRead.clone();
|
read = (SAMRecord)unmappedRead.clone();
|
||||||
read.setReadUmappedFlag(false);
|
if(alignment != null) {
|
||||||
read.setAlignmentStart((int)alignment.getAlignmentStart());
|
read.setReadUmappedFlag(false);
|
||||||
read.setReadNegativeStrandFlag(alignment.isNegativeStrand());
|
read.setReferenceIndex(alignment.getContigIndex());
|
||||||
read.setMappingQuality(alignment.getMappingQuality());
|
read.setAlignmentStart((int)alignment.getAlignmentStart());
|
||||||
read.setCigar(alignment.getCigar());
|
read.setReadNegativeStrandFlag(alignment.isNegativeStrand());
|
||||||
if(alignment.isNegativeStrand()) {
|
read.setMappingQuality(alignment.getMappingQuality());
|
||||||
read.setReadBases(BaseUtils.reverse(read.getReadBases()));
|
read.setCigar(alignment.getCigar());
|
||||||
read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities()));
|
if(alignment.isNegativeStrand()) {
|
||||||
|
read.setReadBases(BaseUtils.reverse(read.getReadBases()));
|
||||||
|
read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities()));
|
||||||
|
}
|
||||||
|
read.setAttribute("NM",alignment.getEditDistance());
|
||||||
|
read.setAttribute("MD",alignment.getMismatchingPositions());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch(CloneNotSupportedException ex) {
|
catch(CloneNotSupportedException ex) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue