Fix bug where alignments with indels would be busted because bwa reverses

the read bases to undo a previous read base reverse that doesn't occur in the
libbwa codepath.
Also fixed some memory management issues.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1938 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-10-29 21:33:13 +00:00
parent ef00ba3ced
commit 1f0d852a48
4 changed files with 42 additions and 21 deletions

View File

@ -28,7 +28,7 @@ BWA::~BWA() {
void BWA::align(const char* bases, const unsigned read_length, Alignment*& alignments, unsigned& num_alignments)
{
bwa_seq_t* sequence = create_sequence();
copy_bases_into_sequence(sequence, bases, read_length, true);
copy_bases_into_sequence(sequence, bases, read_length);
// Calculate the suffix array interval for each sequence, storing the result in sequence.aln (and sequence.n_aln).
// This method will destroy the contents of seq and rseq.
@ -39,7 +39,7 @@ void BWA::align(const char* bases, const unsigned read_length, Alignment*& align
// Calculate and refine the position for each alignment. This position may be inaccurate
// if the read contains indels, etc. Refinement requires the original sequences in the proper order.
copy_bases_into_sequence(sequence, bases, read_length, false);
copy_bases_into_sequence(sequence, bases, read_length);
create_alignments(sequence, alignments, num_alignments);
bwa_free_read_seq(1,sequence);
@ -89,17 +89,18 @@ bwa_seq_t* BWA::create_sequence()
return sequence;
}
void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, unsigned read_length, bool reverse)
void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length)
{
// seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap
sequence->seq = new ubyte_t[read_length];
sequence->rseq = new ubyte_t[read_length];
for(unsigned i = 0; i < read_length; i++) sequence->seq[i] = nst_nt4_table[(unsigned)bases[i]];
memcpy(sequence->rseq,sequence->seq,read_length);
if(reverse) {
seq_reverse(read_length,sequence->seq,0);
seq_reverse(read_length,sequence->rseq,1);
}
// BWA expects the read bases to arrive reversed.
seq_reverse(read_length,sequence->seq,0);
seq_reverse(read_length,sequence->rseq,1);
sequence->full_len = sequence->len = read_length;
}
@ -136,10 +137,15 @@ void BWA::create_alignments(bwa_seq_t* sequence, Alignment*& alignments, unsigne
bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig);
alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1;
alignment.negative_strand = sequence->strand;
alignment.cigar = sequence->cigar;
alignment.n_cigar = sequence->n_cigar;
alignment.mapQ = sequence->mapQ;
alignment.cigar = NULL;
if(sequence->cigar) {
alignment.cigar = new uint16_t[sequence->n_cigar];
memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
}
alignment.n_cigar = sequence->n_cigar;
alignment_idx++;
}
}

View File

@ -17,7 +17,7 @@ class BWA {
void load_default_options();
bwa_seq_t* create_sequence();
void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length, const bool reverse);
void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length);
void create_alignments(bwa_seq_t* sequence, Alignment*& alignments, unsigned& num_alignments);
public:

View File

@ -110,6 +110,8 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACA
java_cigar_operators,
java_cigar_lengths);
env->SetObjectArrayElement(java_alignments,alignment_idx,java_alignment);
delete[] alignment.cigar;
}
delete[] alignments;

View File

@ -3,9 +3,11 @@ package org.broadinstitute.sting.alignment.bwa;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.alignment.Alignment;
import java.io.File;
import java.util.Scanner;
/**
* An aligner using the BWA/C implementation.
@ -109,20 +111,31 @@ public class BWACAligner {
int count = 0;
try{
System.out.println("Press any key");
System.in.read();
}
catch(Exception ex) {
}
for(SAMRecord read: reader) {
count++;
//if(count > 1) break;
Alignment[] alignments = thunk.align(read.getReadBases());
/*
System.out.printf("Read: %s: ", read.getReadName());
for(Alignment alignment: alignments)
System.out.printf("tig = %d; pos = %d, neg strand = %b, mapQ = %d, cigar = %s;",
alignment.getContigIndex(),
alignment.getAlignmentStart(),
alignment.isNegativeStrand(),
alignment.getMappingQuality(),
alignment.getCigarString());
*/
//if(!read.getReadName().equals("SL-XBC:1:83:664:1077#0"))
// continue;
byte[] bases = read.getReadBases();
if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases);
Alignment[] alignments = thunk.align(bases);
//System.out.printf("Read: %s: ", read.getReadName());
//for(Alignment alignment: alignments)
// System.out.printf("tig = %d; pos = %d, neg strand = %b, mapQ = %d, cigar = %s;",
// alignment.getContigIndex(),
// alignment.getAlignmentStart(),
// alignment.isNegativeStrand(),
// alignment.getMappingQuality(),
// alignment.getCigarString());
if(count % 10000 == 0) System.out.printf("Processed %d reads.%n",count);
//System.out.printf("%n");
}