diff --git a/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.cpp b/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.cpp index fa9f2c14d..9a74410d7 100644 --- a/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.cpp +++ b/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.cpp @@ -60,7 +60,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_d delete bwa; } -JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_align(JNIEnv* env, jobject object, jlong java_bwa, jbyteArray java_bases) { +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_getAlignments(JNIEnv* env, jobject object, jlong java_bwa, jbyteArray java_bases) { BWA* bwa = (BWA*)java_bwa; const jsize read_length = env->GetArrayLength(java_bases); diff --git a/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.h b/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.h index cf3bb7ebf..23037cf12 100644 --- a/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.h +++ b/c/bwa/org_broadinstitute_sting_alignment_bwa_BWACAligner.h @@ -25,10 +25,10 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_d /* * Class: org_broadinstitute_sting_alignment_bwa_BWACAligner - * Method: align + * Method: getAlignments * Signature: (J[B)[Lorg/broadinstitute/sting/alignment/Alignment; */ -JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_align +JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_getAlignments (JNIEnv *, jobject, jlong, jbyteArray); #ifdef __cplusplus diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/AlignmentValidationWalker.java b/java/src/org/broadinstitute/sting/alignment/bwa/AlignmentValidationWalker.java new file mode 100644 index 000000000..b3b629e85 --- /dev/null +++ b/java/src/org/broadinstitute/sting/alignment/bwa/AlignmentValidationWalker.java @@ -0,0 +1,72 @@ +package org.broadinstitute.sting.alignment.bwa; + +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.alignment.Alignment; +import net.sf.samtools.SAMRecord; + +/** + * Validates alignments against existing reads. + * + * @author mhanna + * @version 0.1 + */ +public class AlignmentValidationWalker extends ReadWalker { + @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) + String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta"; + + private BWACAligner aligner = null; + + + @Override + public void initialize() { + aligner = new BWACAligner(prefix + ".ann", + prefix + ".amb", + prefix + ".pac", + prefix + ".bwt", + prefix + ".sa", + prefix + ".rbwt", + prefix + ".rsa"); + } + + public Integer reduceInit() { return 0; } + + @Override + public Integer map(char[] ref, SAMRecord read) { + byte[] bases = read.getReadBases(); + if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases); + + boolean matches = true; + Alignment[] alignments = aligner.getAlignments(bases); + + if(alignments.length == 0 && !read.getReadUnmappedFlag()) + matches = false; + else { + for(Alignment alignment: alignments) { + matches = (alignment.getAlignmentStart() == read.getAlignmentStart()); + matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag()); + matches &= (alignment.getCigar().equals(read.getCigar())); + matches &= (alignment.getMappingQuality() == read.getMappingQuality()); + if(matches) break; + } + } + + if(!matches) + throw new StingException(String.format("Read %s mismatches!", read.getReadName())); + + return 1; + } + + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + super.onTraversalDone(result); + } + +} diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/AlignmentWalker.java b/java/src/org/broadinstitute/sting/alignment/bwa/AlignmentWalker.java new file mode 100644 index 000000000..8be6fad88 --- /dev/null +++ b/java/src/org/broadinstitute/sting/alignment/bwa/AlignmentWalker.java @@ -0,0 +1,63 @@ +package org.broadinstitute.sting.alignment.bwa; + +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.alignment.Alignment; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import net.sf.samtools.SAMRecord; + +import java.util.Random; + +/** + * Javadoc goes here. + * + * @author mhanna + * @version 0.1 + */ +public class AlignmentWalker extends ReadWalker { + @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) + String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta"; + + /** + * The actual aligner. + */ + private BWACAligner aligner = null; + + /** + * A random number generator, used to generate alignments. + */ + private Random random = new Random(); + + @Override + public void initialize() { + aligner = new BWACAligner(prefix + ".ann", + prefix + ".amb", + prefix + ".pac", + prefix + ".bwt", + prefix + ".sa", + prefix + ".rbwt", + prefix + ".rsa"); + } + + public Integer reduceInit() { return 0; } + + @Override + public Integer map(char[] ref, SAMRecord read) { + SAMRecord[] alignedReads = aligner.align(read); + SAMRecord selectedRead = alignedReads[random.nextInt(alignedReads.length)]; + out.println(selectedRead.format()); + return 1; + } + + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + super.onTraversalDone(result); + } + +} diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWACAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWACAligner.java index 8f1b83f42..1af820811 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWACAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWACAligner.java @@ -82,10 +82,40 @@ public class BWACAligner { * @param bases ASCII representation of byte array. * @return an array of indices into the bwa. */ - public Alignment[] align(byte[] bases) { + public Alignment[] getAlignments(byte[] bases) { if(thunkPointer == 0) throw new StingException("BWA/C align attempted, but BWA/C was never properly initialized."); - return align(thunkPointer,bases); + return getAlignments(thunkPointer,bases); + } + + /** + * Push all alignment data into individual SAMRecords, gaining in convenience but losing some of + * the additional data stored in an alignment object. + * @param read The read to align. + * @return A list of potential alignments. + */ + public SAMRecord[] align(SAMRecord read) { + Alignment[] alignments = getAlignments(read.getReadBases()); + SAMRecord[] reads = new SAMRecord[alignments.length]; + for(int i = 0; i < alignments.length; i++) { + try { + reads[i] = (SAMRecord)read.clone(); + reads[i].setReadUmappedFlag(false); + reads[i].setAlignmentStart((int)alignments[i].getAlignmentStart()); + reads[i].setReadNegativeStrandFlag(alignments[i].isNegativeStrand()); + reads[i].setMappingQuality(alignments[i].getMappingQuality()); + reads[i].setCigar(alignments[i].getCigar()); + if(alignments[i].isNegativeStrand()) { + reads[i].setReadBases(BaseUtils.reverse(reads[i].getReadBases())); + reads[i].setBaseQualities(BaseUtils.reverse(reads[i].getBaseQualities())); + } + } + catch(CloneNotSupportedException ex) { + throw new StingException("Unable to create aligned read from template."); + } + } + + return reads; } /** @@ -94,52 +124,5 @@ public class BWACAligner { * @param thunkPointer pointer to the C++ object managing BWA/C. * @param bases ASCII representation of byte array. */ - protected native Alignment[] align(long thunkPointer, byte[] bases); - - public static void main(String[] args) { - String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta"; - BWACAligner thunk = new BWACAligner(prefix + ".ann", - prefix + ".amb", - prefix + ".pac", - prefix + ".bwt", - prefix + ".sa", - prefix + ".rbwt", - prefix + ".rsa"); - - SAMFileReader reader = new SAMFileReader(new File("/Users/mhanna/reference/Ecoli/MV1994.bam")); - reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT); - - int count = 0; - - try{ - System.out.println("Press any key"); - System.in.read(); - } - catch(Exception ex) { - - } - - for(SAMRecord read: reader) { - count++; - //if(count > 1) break; - //if(!read.getReadName().equals("SL-XBC:1:83:664:1077#0")) - // continue; - byte[] bases = read.getReadBases(); - if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases); - - Alignment[] alignments = thunk.align(bases); - //System.out.printf("Read: %s: ", read.getReadName()); - //for(Alignment alignment: alignments) - // System.out.printf("tig = %d; pos = %d, neg strand = %b, mapQ = %d, cigar = %s;", - // alignment.getContigIndex(), - // alignment.getAlignmentStart(), - // alignment.isNegativeStrand(), - // alignment.getMappingQuality(), - // alignment.getCigarString()); - if(count % 10000 == 0) System.out.printf("Processed %d reads.%n",count); - //System.out.printf("%n"); - } - - thunk.close(); - } + protected native Alignment[] getAlignments(long thunkPointer, byte[] bases); }