Checkpoint. Add first phase of single alignment interface.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2094 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-11-19 19:03:43 +00:00
parent 306f4624c6
commit a972b2769f
9 changed files with 516 additions and 147 deletions

View File

@ -1,5 +1,5 @@
#!/bin/sh
export BWA_HOME="/humgen/gsa-hphome1/hanna/src/bwa-0.5.3"
export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa"
export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux"
export TARGET_LIB="libbwa.so"
export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread"

View File

@ -36,22 +36,114 @@ BWA::~BWA() {
bwt_destroy(bwts[1]);
}
void BWA::align(const char* bases, const unsigned read_length, Alignment*& alignments, unsigned& num_alignments)
void BWA::find_paths(const char* bases, const unsigned read_length, bwt_aln1_t*& paths, unsigned& num_paths, unsigned& best_path_count, unsigned& second_best_path_count)
{
bwa_seq_t* sequence = create_sequence();
copy_bases_into_sequence(sequence, bases, read_length);
// Calculate the suffix array interval for each sequence, storing the result in sequence.aln (and sequence.n_aln).
// Calculate the suffix array interval for each sequence, storing the result in sequence->aln (and sequence->n_aln).
// This method will destroy the contents of seq and rseq.
bwa_cal_sa_reg_gap(0,bwts,1,sequence,&options);
// Translate suffix array indices into exactly how many alignments have been found.
paths = new bwt_aln1_t[sequence->n_aln];
memcpy(paths,sequence->aln,sequence->n_aln*sizeof(bwt_aln1_t));
num_paths = sequence->n_aln;
// Call aln2seq to initialize the type of match present.
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
best_path_count = sequence->c1;
second_best_path_count = sequence->c2;
bwa_free_read_seq(1,sequence);
}
void BWA::generate_alignments_from_paths(const char* bases,
const unsigned read_length,
bwt_aln1_t* paths,
const unsigned num_paths,
const unsigned best_count,
const unsigned second_best_count,
Alignment*& alignments,
unsigned& num_alignments)
{
bwa_seq_t* sequence = create_sequence();
copy_bases_into_sequence(sequence, bases, read_length);
sequence->aln = paths;
sequence->n_aln = num_paths;
// (Ab)use bwa_aln2seq to propagate values stored in the path out into the sequence itself.
bwa_aln2seq(sequence->n_aln,sequence->aln,sequence);
// Calculate and refine the position for each alignment. This position may be inaccurate
// if the read contains indels, etc. Refinement requires the original sequences in the proper order.
copy_bases_into_sequence(sequence, bases, read_length);
create_alignments(sequence, alignments, num_alignments);
// But overwrite key parts of the sequence in case the user passed back only a smaller subset
// of the paths.
sequence->c1 = best_count;
sequence->c2 = second_best_count;
sequence->type = sequence->c1 > 1 ? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE;
num_alignments = 0;
for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++)
num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1;
alignments = new Alignment[num_alignments];
unsigned alignment_idx = 0;
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
// Stub in a 'working' path, so that only the desired alignment is local-aligned.
const bwt_aln1_t* path = paths + path_idx;
bwt_aln1_t working_path = *path;
// Loop through all alignments, aligning each one individually.
for(unsigned sa_idx = path->k; sa_idx <= path->l; sa_idx++) {
working_path.k = working_path.l = sa_idx;
sequence->aln = &working_path;
sequence->n_aln = 1;
sequence->sa = sa_idx;
sequence->strand = path->a;
sequence->score = path->score;
// Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse.
// TODO: Fix the interface to bwa_refine_gapped so its easier to work with.
if(alignment_idx > 0)
seq_reverse(sequence->len, sequence->seq, 0);
// Calculate the local coordinate and local alignment.
bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr);
bwa_refine_gapped(bns, 1, sequence, reference, NULL);
// Copy the local alignment data into the alignment object.
Alignment& alignment = *(alignments + alignment_idx);
// Populate basic path info
alignment.num_mismatches = sequence->n_mm;
alignment.num_gap_opens = sequence->n_gapo;
alignment.num_gap_extensions = sequence->n_gape;
alignment.num_best = sequence->c1;
alignment.num_second_best = sequence->c2;
alignment.type = sequence->type;
bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig);
alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1;
alignment.negative_strand = sequence->strand;
alignment.mapping_quality = sequence->mapQ;
alignment.cigar = NULL;
if(sequence->cigar) {
alignment.cigar = new uint16_t[sequence->n_cigar];
memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
}
alignment.n_cigar = sequence->n_cigar;
delete[] sequence->md;
sequence->md = NULL;
alignment_idx++;
}
}
sequence->aln = NULL;
sequence->n_aln = 0;
bwa_free_read_seq(1,sequence);
}
@ -132,63 +224,3 @@ void BWA::copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const
sequence->full_len = sequence->len = read_length;
}
void BWA::create_alignments(bwa_seq_t* sequence, Alignment*& alignments, unsigned& num_alignments) {
num_alignments = 0;
for(unsigned i = 0; i < (unsigned)sequence->n_aln; i++)
num_alignments += (sequence->aln + i)->l - (sequence->aln + i)->k + 1;
alignments = new Alignment[num_alignments];
unsigned alignment_idx = 0;
// backup existing alignment blocks.
bwt_aln1_t* alignment_blocks = sequence->aln;
int num_alignment_blocks = sequence->n_aln;
for(unsigned alignment_block_idx = 0; alignment_block_idx < (unsigned)num_alignment_blocks; alignment_block_idx++) {
// Stub in a 'working' alignment block, so that only the desired alignment is local-aligned.
const bwt_aln1_t* alignment_block = alignment_blocks + alignment_block_idx;
bwt_aln1_t working_alignment_block = *alignment_block;
// Loop through all alignments, aligning each one individually.
for(unsigned sa_idx = alignment_block->k; sa_idx <= alignment_block->l; sa_idx++) {
working_alignment_block.k = working_alignment_block.l = sa_idx;
sequence->aln = &working_alignment_block;
sequence->n_aln = 1;
sequence->sa = sa_idx;
sequence->strand = alignment_block->a;
sequence->score = alignment_block->score;
// Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse.
// TODO: Fix the interface to bwa_refine_gapped so its easier to work with.
if(alignment_idx > 0)
seq_reverse(sequence->len, sequence->seq, 0);
// Calculate the local coordinate and local alignment.
bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr);
bwa_refine_gapped(bns, 1, sequence, reference, NULL);
// Copy the local alignment data into the alignment object.
Alignment& alignment = *(alignments + alignment_idx);
alignment.type = sequence->type;
bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig);
alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1;
alignment.negative_strand = sequence->strand;
alignment.mapQ = sequence->mapQ;
alignment.cigar = NULL;
if(sequence->cigar) {
alignment.cigar = new uint16_t[sequence->n_cigar];
memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
}
alignment.n_cigar = sequence->n_cigar;
alignment_idx++;
}
}
// Restore original alignment blocks.
sequence->aln = alignment_blocks;
sequence->n_aln = num_alignment_blocks;
}

View File

@ -19,7 +19,6 @@ class BWA {
void load_default_options();
bwa_seq_t* create_sequence();
void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length);
void create_alignments(bwa_seq_t* sequence, Alignment*& alignments, unsigned& num_alignments);
public:
BWA(const char* ann_filename,
@ -41,7 +40,20 @@ class BWA {
void set_gap_extension_penalty(int penalty);
// Perform the alignment
void align(const char* bases, const unsigned read_length, Alignment*& alignments, unsigned& num_alignments);
void find_paths(const char* bases,
const unsigned read_length,
bwt_aln1_t*& paths,
unsigned& num_paths,
unsigned& best_path_count,
unsigned& second_best_path_count);
void generate_alignments_from_paths(const char* bases,
const unsigned read_length,
bwt_aln1_t* paths,
const unsigned num_paths,
const unsigned best_count,
const unsigned second_best_count,
Alignment*& alignments,
unsigned& num_alignments);
};
class Alignment {
@ -50,11 +62,16 @@ class Alignment {
int contig;
bwtint_t pos;
bool negative_strand;
uint32_t mapping_quality;
uint16_t *cigar;
int n_cigar;
uint32_t mapQ;
uint8_t num_mismatches;
uint8_t num_gap_opens;
uint8_t num_gap_extensions;
uint32_t num_best;
uint32_t num_second_best;
};
#endif // BWA_GATEWAY

View File

@ -95,7 +95,63 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
delete bwa;
}
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getAlignments(JNIEnv* env, jobject object, jlong java_bwa, jbyteArray java_bases)
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases)
{
BWA* bwa = (BWA*)java_bwa;
const jsize read_length = env->GetArrayLength(java_bases);
if(env->ExceptionCheck()) return NULL;
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
if(read_bases == NULL) return NULL;
bwt_aln1_t* paths = NULL;
unsigned num_paths = 0;
unsigned best_path_count, second_best_path_count;
bwa->find_paths((const char*)read_bases,read_length,paths,num_paths,best_path_count,second_best_path_count);
jobjectArray java_paths = env->NewObjectArray(num_paths, env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath"), NULL);
if(java_paths == NULL) return NULL;
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
bwt_aln1_t& path = *(paths + path_idx);
jclass java_path_class = env->FindClass("org/broadinstitute/sting/alignment/bwa/c/BWAPath");
if(java_path_class == NULL) return NULL;
jmethodID java_path_constructor = env->GetMethodID(java_path_class, "<init>", "(IIIZJJIII)V");
if(java_path_constructor == NULL) return NULL;
// Note that k/l are being cast to long. Bad things will happen if JNI assumes that they're ints.
jobject java_path = env->NewObject(java_path_class,
java_path_constructor,
path.n_mm,
path.n_gapo,
path.n_gape,
path.a,
(jlong)path.k,
(jlong)path.l,
path.score,
best_path_count,
second_best_path_count);
if(java_path == NULL) return NULL;
env->SetObjectArrayElement(java_paths,path_idx,java_path);
if(env->ExceptionCheck()) return NULL;
env->DeleteLocalRef(java_path_class);
if(env->ExceptionCheck()) return NULL;
}
delete[] paths;
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);
return env->ExceptionCheck() ? NULL : java_paths;
}
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases, jobjectArray java_paths)
{
BWA* bwa = (BWA*)java_bwa;
@ -105,10 +161,67 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA
jbyte *read_bases = env->GetByteArrayElements(java_bases,JNI_FALSE);
if(read_bases == NULL) return NULL;
const jsize num_paths = env->GetArrayLength(java_paths);
bwt_aln1_t* paths = new bwt_aln1_t[num_paths];
unsigned best_count = 0, second_best_count = 0;
for(unsigned path_idx = 0; path_idx < (unsigned)num_paths; path_idx++) {
jobject java_path = env->GetObjectArrayElement(java_paths,path_idx);
jclass java_path_class = env->GetObjectClass(java_path);
if(java_path_class == NULL) return NULL;
bwt_aln1_t& path = *(paths + path_idx);
jfieldID mismatches_field = env->GetFieldID(java_path_class, "numMismatches", "I");
if(mismatches_field == NULL) return NULL;
path.n_mm = env->GetIntField(java_path,mismatches_field);
if(env->ExceptionCheck()) return NULL;
jfieldID gap_opens_field = env->GetFieldID(java_path_class, "numGapOpens", "I");
if(gap_opens_field == NULL) return NULL;
path.n_gapo = env->GetIntField(java_path,gap_opens_field);
if(env->ExceptionCheck()) return NULL;
jfieldID gap_extensions_field = env->GetFieldID(java_path_class, "numGapExtensions", "I");
if(gap_extensions_field == NULL) return NULL;
path.n_gape = env->GetIntField(java_path,gap_extensions_field);
if(env->ExceptionCheck()) return NULL;
jfieldID negative_strand_field = env->GetFieldID(java_path_class, "negativeStrand", "Z");
if(negative_strand_field == NULL) return NULL;
path.a = env->GetBooleanField(java_path,negative_strand_field);
if(env->ExceptionCheck()) return NULL;
jfieldID k_field = env->GetFieldID(java_path_class, "k", "J");
if(k_field == NULL) return NULL;
path.k = env->GetLongField(java_path,k_field);
if(env->ExceptionCheck()) return NULL;
jfieldID l_field = env->GetFieldID(java_path_class, "l", "J");
if(l_field == NULL) return NULL;
path.l = env->GetLongField(java_path,l_field);
if(env->ExceptionCheck()) return NULL;
jfieldID score_field = env->GetFieldID(java_path_class, "score", "I");
if(score_field == NULL) return NULL;
path.score = env->GetIntField(java_path,score_field);
if(env->ExceptionCheck()) return NULL;
jfieldID best_count_field = env->GetFieldID(java_path_class, "bestCount", "I");
if(best_count_field == NULL) return NULL;
best_count = env->GetIntField(java_path,best_count_field);
if(env->ExceptionCheck()) return NULL;
jfieldID second_best_count_field = env->GetFieldID(java_path_class, "secondBestCount", "I");
if(second_best_count_field == NULL) return NULL;
second_best_count = env->GetIntField(java_path,second_best_count_field);
if(env->ExceptionCheck()) return NULL;
}
Alignment* alignments = NULL;
unsigned num_alignments = 0;
bwa->align((const char*)read_bases,read_length,alignments,num_alignments);
bwa->generate_alignments_from_paths((const char*)read_bases,read_length,paths,num_paths,best_count,second_best_count,alignments,num_alignments);
num_alignments = 0;
jobjectArray java_alignments = env->NewObjectArray(num_alignments, env->FindClass("org/broadinstitute/sting/alignment/Alignment"), NULL);
if(java_alignments == NULL) return NULL;
@ -128,39 +241,44 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA
if(alignment.cigar) {
for(unsigned cigar_idx = 0; cigar_idx < (unsigned)alignment.n_cigar; ++cigar_idx) {
jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14];
jint cigar_length = alignment.cigar[cigar_idx]&0x3fff;
env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator);
if(env->ExceptionCheck()) return NULL;
env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length);
if(env->ExceptionCheck()) return NULL;
jchar cigar_operator = "MIDS"[alignment.cigar[cigar_idx]>>14];
jint cigar_length = alignment.cigar[cigar_idx]&0x3fff;
env->SetCharArrayRegion(java_cigar_operators,cigar_idx,1,&cigar_operator);
if(env->ExceptionCheck()) return NULL;
env->SetIntArrayRegion(java_cigar_lengths,cigar_idx,1,&cigar_length);
if(env->ExceptionCheck()) return NULL;
}
}
else {
if(alignment.type != BWA_TYPE_NO_MATCH) {
jchar cigar_operator = 'M';
env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator);
if(env->ExceptionCheck()) return NULL;
env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length);
if(env->ExceptionCheck()) return NULL;
jchar cigar_operator = 'M';
env->SetCharArrayRegion(java_cigar_operators,0,1,&cigar_operator);
if(env->ExceptionCheck()) return NULL;
env->SetIntArrayRegion(java_cigar_lengths,0,1,&read_length);
if(env->ExceptionCheck()) return NULL;
}
}
jclass java_alignment_class = env->FindClass("org/broadinstitute/sting/alignment/Alignment");
if(java_alignment_class == NULL) return NULL;
jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[I)V");
jmethodID java_alignment_constructor = env->GetMethodID(java_alignment_class, "<init>", "(IIZI[C[IIIIII)V");
if(java_alignment_constructor == NULL) return NULL;
jobject java_alignment = env->NewObject(java_alignment_class,
java_alignment_constructor,
alignment.contig,
alignment.pos,
alignment.negative_strand,
alignment.mapQ,
java_cigar_operators,
java_cigar_lengths);
java_alignment_constructor,
alignment.contig,
alignment.pos,
alignment.negative_strand,
alignment.mapping_quality,
java_cigar_operators,
java_cigar_lengths,
alignment.num_mismatches,
alignment.num_gap_opens,
alignment.num_gap_extensions,
alignment.num_best,
alignment.num_second_best);
if(java_alignment == NULL) return NULL;
delete[] alignment.cigar;
@ -173,6 +291,7 @@ JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWA
}
delete[] alignments;
delete[] paths;
env->ReleaseByteArrayElements(java_bases,read_bases,JNI_FALSE);

View File

@ -25,12 +25,20 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
/*
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
* Method: getAlignments
* Signature: (J[B)[Lorg/broadinstitute/sting/alignment/Alignment;
* Method: getPaths
* Signature: (J[B)[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;
*/
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getAlignments
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths
(JNIEnv *, jobject, jlong, jbyteArray);
/*
* Class: org_broadinstitute_sting_alignment_bwa_c_BWACAligner
* Method: convertPathsToAlignments
* Signature: (J[B[Lorg/broadinstitute/sting/alignment/bwa/c/BWAPath;)[Lorg/broadinstitute/sting/alignment/Alignment;
*/
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_convertPathsToAlignments
(JNIEnv *, jobject, jlong, jbyteArray, jobjectArray);
#ifdef __cplusplus
}
#endif

View File

@ -16,8 +16,14 @@ public class Alignment {
protected boolean negativeStrand;
protected int mappingQuality;
private char[] cigarOperators;
private int[] cigarLengths;
protected char[] cigarOperators;
protected int[] cigarLengths;
protected int numMismatches;
protected int numGapOpens;
protected int numGapExtensions;
protected int bestCount;
protected int secondBestCount;
/**
* Gets the index of the given contig.
@ -43,6 +49,36 @@ public class Alignment {
*/
public int getMappingQuality() { return mappingQuality; }
/**
* Gets the number of mismatches in the read.
* @return Number of mismatches.
*/
public int getNumMismatches() { return numMismatches; }
/**
* Get the number of gap opens.
* @return Number of gap opens.
*/
public int getNumGapOpens() { return numGapOpens; }
/**
* Get the number of gap extensions.
* @return Number of gap extensions.
*/
public int getNumGapExtensions() { return numGapExtensions; }
/**
* Get the number of best alignments.
* @return Number of top scoring alignments.
*/
public int getBestCount() { return bestCount; }
/**
* Get the number of second best alignments.
* @return Number of second best scoring alignments.
*/
public int getSecondBestCount() { return secondBestCount; }
/**
* Gets the cigar for this alignment.
* @return sam-jdk formatted alignment.
@ -87,13 +123,27 @@ public class Alignment {
* @param cigarOperators The ordered operators in the cigar string.
* @param cigarLengths The lengths to which each operator applies.
*/
public Alignment(int contigIndex, int alignmentStart, boolean negativeStrand, int mappingQuality,
char[] cigarOperators, int[] cigarLengths) {
public Alignment(int contigIndex,
int alignmentStart,
boolean negativeStrand,
int mappingQuality,
char[] cigarOperators,
int[] cigarLengths,
int numMismatches,
int numGapOpens,
int numGapExtensions,
int bestCount,
int secondBestCount) {
this.contigIndex = contigIndex;
this.alignmentStart = alignmentStart;
this.negativeStrand = negativeStrand;
this.mappingQuality = mappingQuality;
this.cigarOperators = cigarOperators;
this.cigarLengths = cigarLengths;
this.numMismatches = numMismatches;
this.numGapOpens = numGapOpens;
this.numGapExtensions = numGapExtensions;
this.bestCount = bestCount;
this.secondBestCount = secondBestCount;
}
}

View File

@ -6,9 +6,14 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
import org.broadinstitute.sting.alignment.bwa.c.BWAPath;
import net.sf.samtools.SAMRecord;
import java.util.Iterator;
import java.util.SortedMap;
import java.util.TreeMap;
import java.io.PrintWriter;
import java.io.FileNotFoundException;
/**
* Validates alignments against existing reads.
@ -23,6 +28,8 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
//SortedMap<Integer,Integer> alignmentCounts = new TreeMap<Integer,Integer>();
/**
* The instance used to generate alignments.
*/
@ -48,7 +55,7 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
@Override
public boolean filter(char[] ref, SAMRecord read) {
return true;
//return read.getReadName().contains("SL-XBC:1:61:1719:1512#0");
//return read.getReadName().contains("SL-XBC:1:13:1296:1410#0");
}
/**
@ -66,6 +73,15 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
boolean matches = true;
Iterator<Alignment[]> alignments = aligner.getAlignments(bases);
/*
BWAPath[] paths = aligner.getPaths(bases);
int bestCount = 0;
if(paths.length > 0) bestCount = paths[0].bestCount;
if(alignmentCounts.containsKey(bestCount))
alignmentCounts.put(bestCount,alignmentCounts.get(bestCount)+1);
else
alignmentCounts.put(bestCount,1);
*/
if(!alignments.hasNext()) {
matches = read.getReadUnmappedFlag();
@ -102,7 +118,9 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
throw new StingException(String.format("Read %s mismatches!", read.getReadName()));
}
if(++count % 10000 == 0) logger.info(String.format("Processed %d reads", count));
if(++count % 10000 == 0) {
logger.info(String.format("Processed %d reads", count));
}
//logger.info(String.format("validated read %s", read.getReadName()));
@ -135,6 +153,18 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
public void onTraversalDone(Integer result) {
aligner.close();
super.onTraversalDone(result);
/*
try {
PrintWriter output = new PrintWriter("GATK_distribution.out");
for(SortedMap.Entry<Integer,Integer> results: alignmentCounts.entrySet())
output.printf("%d\t%d%n", results.getKey(), results.getValue());
output.close();
}
catch(FileNotFoundException ex) {
throw new StingException("Unable to open output file");
}
*/
}
}

View File

@ -4,11 +4,8 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
import java.util.Comparator;
import java.util.Arrays;
import java.util.Iterator;
import java.util.*;
/**
* An aligner using the BWA/C implementation.
@ -34,7 +31,7 @@ public class BWACAligner {
protected native long create(BWACConfiguration configuration);
/**
* Destroy the
* Destroy the BWA/C thunk.
* @param thunkPointer Pointer to the allocated thunk.
*/
protected native void destroy(long thunkPointer);
@ -59,8 +56,8 @@ public class BWACAligner {
* @param bases List of bases.
* @return Iterator to alignments.
*/
public Iterator<Alignment[]> getAlignments(byte[] bases) {
final Alignment[] alignments = getAllAlignments(bases);
public Iterator<Alignment[]> getAlignments(final byte[] bases) {
final BWAPath[] paths = getPaths(bases);
return new Iterator<Alignment[]>() {
/**
* The last position accessed.
@ -71,19 +68,19 @@ public class BWACAligner {
* Whether all alignments have been seen based on the current position.
* @return True if any more alignments are pending. False otherwise.
*/
public boolean hasNext() { return position < alignments.length; }
public boolean hasNext() { return position < paths.length; }
/**
* Return the next cross-section of alignments, based on mapping quality.
* @return Array of the next set of alignments of a given mapping quality.
*/
public Alignment[] next() {
if(position >= alignments.length)
if(position >= paths.length)
throw new UnsupportedOperationException("Out of alignments to return.");
int mapQ = alignments[position].getMappingQuality();
int score = paths[position].score;
int startingPosition = position;
while(position < alignments.length && alignments[position].getMappingQuality() == mapQ) position++;
return Arrays.copyOfRange(alignments,startingPosition,position);
while(position < paths.length && paths[position].score == score) position++;
return convertPathsToAlignments(bases,Arrays.copyOfRange(paths,startingPosition,position));
}
/**
@ -93,19 +90,6 @@ public class BWACAligner {
};
}
/**
* Align the given base array to the BWT. The base array should be in ASCII format.
* @param bases ASCII representation of byte array.
* @return an array of indices into the bwa.
*/
public Alignment[] getAllAlignments(byte[] bases) {
if(thunkPointer == 0)
throw new StingException("BWA/C align attempted, but BWA/C was never properly initialized.");
Alignment[] alignments = getAlignments(thunkPointer,bases);
Arrays.sort(alignments,new AlignmentMapQComparator());
return alignments;
}
/**
* Get a iterator of aligned reads, batched by mapping quality.
* @param read Read to align.
@ -182,28 +166,56 @@ public class BWACAligner {
}
/**
* Caller to the . The base array should be in
* ASCII format.
* @param thunkPointer pointer to the C++ object managing BWA/C.
* Align the given base array to the BWT. The base array should be in ASCII format.
* @param bases ASCII representation of byte array.
* @return an array of indices into the bwa.
*/
protected native Alignment[] getAlignments(long thunkPointer, byte[] bases);
public Alignment[] getAllAlignments(byte[] bases) {
if(thunkPointer == 0)
throw new StingException("BWA/C align attempted, but BWA/C is not properly initialized.");
BWAPath[] paths = getPaths(bases);
return convertPathsToAlignments(thunkPointer,bases,paths);
}
/**
* Compares two alignments based on their mapping quality. The one with the highest mapping quality comes FIRST.
* Get the paths associated with the given base string.
* @param bases List of bases.
* @return A set of paths through the BWA.
*/
private class AlignmentMapQComparator implements Comparator<Alignment> {
/**
* Compares two alignments based on their score. If lhs has a higher mapping quality than
* rhs, the score will be negative.
* @param lhs Left-hand side for comparison.
* @param rhs Right-hand side for comparison.
* @return Negative value if lhs' score is highe
*/
public int compare(Alignment lhs, Alignment rhs) {
// NOTE: this strategy works because Integer.MAX_VALUE, Integer.MIN_VALUE >> than valid range of mapping
// qualities.
return rhs.getMappingQuality() - lhs.getMappingQuality();
}
public BWAPath[] getPaths(byte[] bases) {
if(thunkPointer == 0)
throw new StingException("BWA/C getPaths attempted, but BWA/C is not properly initialized.");
BWAPath[] paths = getPaths(thunkPointer,bases);
return paths;
}
/**
* Do the extra steps involved in converting a local alignment to a global alignment.
* @param bases ASCII representation of byte array.
* @param paths Paths through the current BWT.
* @return A list of alignments.
*/
protected Alignment[] convertPathsToAlignments(byte[] bases, BWAPath[] paths) {
if(thunkPointer == 0)
throw new StingException("BWA/C convertPathsToAlignments attempted, but BWA/C is not properly initialized.");
return convertPathsToAlignments(thunkPointer,bases,paths);
}
/**
* Caller to the path generation functionality within BWA/C. Call this method's getPaths() wrapper (above) instead.
* @param thunkPointer pointer to the C++ object managing BWA/C.
* @param bases ASCII representation of byte array.
* @return A list of paths through the specified BWT.
*/
protected native BWAPath[] getPaths(long thunkPointer, byte[] bases);
/**
* Do the extra steps involved in converting a local alignment to a global alignment.
* Call this method's convertPathsToAlignments() wrapper (above) instead.
* @param thunkPointer pointer to the C++ object managing BWA/C.
* @param bases ASCII representation of byte array.
* @param paths Paths through the current BWT.
* @return A list of alignments.
*/
protected native Alignment[] convertPathsToAlignments(long thunkPointer, byte[] bases, BWAPath[] paths);
}

View File

@ -0,0 +1,101 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.alignment.bwa.c;
/**
* Models a BWA path.
*
* @author mhanna
* @version 0.1
*/
public class BWAPath {
/**
* Number of mismatches encountered along this path.
*/
public final int numMismatches;
/**
* Number of gap opens encountered along this path.
*/
public final int numGapOpens;
/**
* Number of gap extensions along this path.
*/
public final int numGapExtensions;
/**
* Whether this alignment was found on the positive or negative strand.
*/
public final boolean negativeStrand;
/**
* Starting coordinate in the BWT.
*/
public final long k;
/**
* Ending coordinate in the BWT.
*/
public final long l;
/**
* The score of this path.
*/
public final int score;
/**
* The number of best alignments seen along this path.
*/
public final int bestCount;
/**
* The number of second best alignments seen along this path.
*/
public final int secondBestCount;
/**
* Create a new path with the given attributes.
* @param numMismatches Number of mismatches along path.
* @param numGapOpens Number of gap opens along path.
* @param numGapExtensions Number of gap extensions along path.
* @param k Index to first coordinate within BWT.
* @param l Index to last coordinate within BWT.
* @param score Score of this alignment. Not the mapping quality.
*/
public BWAPath(int numMismatches, int numGapOpens, int numGapExtensions, boolean negativeStrand, long k, long l, int score, int bestCount, int secondBestCount) {
this.numMismatches = numMismatches;
this.numGapOpens = numGapOpens;
this.numGapExtensions = numGapExtensions;
this.negativeStrand = negativeStrand;
this.k = k;
this.l = l;
this.score = score;
this.bestCount = bestCount;
this.secondBestCount = secondBestCount;
}
}