Created walkers for alignment, validation.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1945 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-10-30 15:04:07 +00:00
parent 51fffc7f69
commit 2d15891719
5 changed files with 171 additions and 53 deletions

View File

@ -60,7 +60,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_d
delete bwa;
}
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_align(JNIEnv* env, jobject object, jlong java_bwa, jbyteArray java_bases) {
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_getAlignments(JNIEnv* env, jobject object, jlong java_bwa, jbyteArray java_bases) {
BWA* bwa = (BWA*)java_bwa;
const jsize read_length = env->GetArrayLength(java_bases);

View File

@ -25,10 +25,10 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_d
/*
* Class: org_broadinstitute_sting_alignment_bwa_BWACAligner
* Method: align
* Method: getAlignments
* Signature: (J[B)[Lorg/broadinstitute/sting/alignment/Alignment;
*/
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_align
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_getAlignments
(JNIEnv *, jobject, jlong, jbyteArray);
#ifdef __cplusplus

View File

@ -0,0 +1,72 @@
package org.broadinstitute.sting.alignment.bwa;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.alignment.Alignment;
import net.sf.samtools.SAMRecord;
/**
* Validates alignments against existing reads.
*
* @author mhanna
* @version 0.1
*/
public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
private BWACAligner aligner = null;
@Override
public void initialize() {
aligner = new BWACAligner(prefix + ".ann",
prefix + ".amb",
prefix + ".pac",
prefix + ".bwt",
prefix + ".sa",
prefix + ".rbwt",
prefix + ".rsa");
}
public Integer reduceInit() { return 0; }
@Override
public Integer map(char[] ref, SAMRecord read) {
byte[] bases = read.getReadBases();
if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases);
boolean matches = true;
Alignment[] alignments = aligner.getAlignments(bases);
if(alignments.length == 0 && !read.getReadUnmappedFlag())
matches = false;
else {
for(Alignment alignment: alignments) {
matches = (alignment.getAlignmentStart() == read.getAlignmentStart());
matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag());
matches &= (alignment.getCigar().equals(read.getCigar()));
matches &= (alignment.getMappingQuality() == read.getMappingQuality());
if(matches) break;
}
}
if(!matches)
throw new StingException(String.format("Read %s mismatches!", read.getReadName()));
return 1;
}
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
@Override
public void onTraversalDone(Integer result) {
aligner.close();
super.onTraversalDone(result);
}
}

View File

@ -0,0 +1,63 @@
package org.broadinstitute.sting.alignment.bwa;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import net.sf.samtools.SAMRecord;
import java.util.Random;
/**
* Javadoc goes here.
*
* @author mhanna
* @version 0.1
*/
public class AlignmentWalker extends ReadWalker<Integer,Integer> {
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
/**
* The actual aligner.
*/
private BWACAligner aligner = null;
/**
* A random number generator, used to generate alignments.
*/
private Random random = new Random();
@Override
public void initialize() {
aligner = new BWACAligner(prefix + ".ann",
prefix + ".amb",
prefix + ".pac",
prefix + ".bwt",
prefix + ".sa",
prefix + ".rbwt",
prefix + ".rsa");
}
public Integer reduceInit() { return 0; }
@Override
public Integer map(char[] ref, SAMRecord read) {
SAMRecord[] alignedReads = aligner.align(read);
SAMRecord selectedRead = alignedReads[random.nextInt(alignedReads.length)];
out.println(selectedRead.format());
return 1;
}
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
@Override
public void onTraversalDone(Integer result) {
aligner.close();
super.onTraversalDone(result);
}
}

View File

@ -82,10 +82,40 @@ public class BWACAligner {
* @param bases ASCII representation of byte array.
* @return an array of indices into the bwa.
*/
public Alignment[] align(byte[] bases) {
public Alignment[] getAlignments(byte[] bases) {
if(thunkPointer == 0)
throw new StingException("BWA/C align attempted, but BWA/C was never properly initialized.");
return align(thunkPointer,bases);
return getAlignments(thunkPointer,bases);
}
/**
* Push all alignment data into individual SAMRecords, gaining in convenience but losing some of
* the additional data stored in an alignment object.
* @param read The read to align.
* @return A list of potential alignments.
*/
public SAMRecord[] align(SAMRecord read) {
Alignment[] alignments = getAlignments(read.getReadBases());
SAMRecord[] reads = new SAMRecord[alignments.length];
for(int i = 0; i < alignments.length; i++) {
try {
reads[i] = (SAMRecord)read.clone();
reads[i].setReadUmappedFlag(false);
reads[i].setAlignmentStart((int)alignments[i].getAlignmentStart());
reads[i].setReadNegativeStrandFlag(alignments[i].isNegativeStrand());
reads[i].setMappingQuality(alignments[i].getMappingQuality());
reads[i].setCigar(alignments[i].getCigar());
if(alignments[i].isNegativeStrand()) {
reads[i].setReadBases(BaseUtils.reverse(reads[i].getReadBases()));
reads[i].setBaseQualities(BaseUtils.reverse(reads[i].getBaseQualities()));
}
}
catch(CloneNotSupportedException ex) {
throw new StingException("Unable to create aligned read from template.");
}
}
return reads;
}
/**
@ -94,52 +124,5 @@ public class BWACAligner {
* @param thunkPointer pointer to the C++ object managing BWA/C.
* @param bases ASCII representation of byte array.
*/
protected native Alignment[] align(long thunkPointer, byte[] bases);
public static void main(String[] args) {
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
BWACAligner thunk = new BWACAligner(prefix + ".ann",
prefix + ".amb",
prefix + ".pac",
prefix + ".bwt",
prefix + ".sa",
prefix + ".rbwt",
prefix + ".rsa");
SAMFileReader reader = new SAMFileReader(new File("/Users/mhanna/reference/Ecoli/MV1994.bam"));
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
int count = 0;
try{
System.out.println("Press any key");
System.in.read();
}
catch(Exception ex) {
}
for(SAMRecord read: reader) {
count++;
//if(count > 1) break;
//if(!read.getReadName().equals("SL-XBC:1:83:664:1077#0"))
// continue;
byte[] bases = read.getReadBases();
if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases);
Alignment[] alignments = thunk.align(bases);
//System.out.printf("Read: %s: ", read.getReadName());
//for(Alignment alignment: alignments)
// System.out.printf("tig = %d; pos = %d, neg strand = %b, mapQ = %d, cigar = %s;",
// alignment.getContigIndex(),
// alignment.getAlignmentStart(),
// alignment.isNegativeStrand(),
// alignment.getMappingQuality(),
// alignment.getCigarString());
if(count % 10000 == 0) System.out.printf("Processed %d reads.%n",count);
//System.out.printf("%n");
}
thunk.close();
}
protected native Alignment[] getAlignments(long thunkPointer, byte[] bases);
}