Created walkers for alignment, validation.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1945 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
51fffc7f69
commit
2d15891719
|
|
@ -60,7 +60,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_d
|
|||
delete bwa;
|
||||
}
|
||||
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_align(JNIEnv* env, jobject object, jlong java_bwa, jbyteArray java_bases) {
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_getAlignments(JNIEnv* env, jobject object, jlong java_bwa, jbyteArray java_bases) {
|
||||
BWA* bwa = (BWA*)java_bwa;
|
||||
|
||||
const jsize read_length = env->GetArrayLength(java_bases);
|
||||
|
|
|
|||
|
|
@ -25,10 +25,10 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_d
|
|||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_alignment_bwa_BWACAligner
|
||||
* Method: align
|
||||
* Method: getAlignments
|
||||
* Signature: (J[B)[Lorg/broadinstitute/sting/alignment/Alignment;
|
||||
*/
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_align
|
||||
JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_BWACAligner_getAlignments
|
||||
(JNIEnv *, jobject, jlong, jbyteArray);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -0,0 +1,72 @@
|
|||
package org.broadinstitute.sting.alignment.bwa;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Validates alignments against existing reads.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
||||
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
|
||||
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
|
||||
|
||||
private BWACAligner aligner = null;
|
||||
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
aligner = new BWACAligner(prefix + ".ann",
|
||||
prefix + ".amb",
|
||||
prefix + ".pac",
|
||||
prefix + ".bwt",
|
||||
prefix + ".sa",
|
||||
prefix + ".rbwt",
|
||||
prefix + ".rsa");
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
@Override
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
byte[] bases = read.getReadBases();
|
||||
if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases);
|
||||
|
||||
boolean matches = true;
|
||||
Alignment[] alignments = aligner.getAlignments(bases);
|
||||
|
||||
if(alignments.length == 0 && !read.getReadUnmappedFlag())
|
||||
matches = false;
|
||||
else {
|
||||
for(Alignment alignment: alignments) {
|
||||
matches = (alignment.getAlignmentStart() == read.getAlignmentStart());
|
||||
matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag());
|
||||
matches &= (alignment.getCigar().equals(read.getCigar()));
|
||||
matches &= (alignment.getMappingQuality() == read.getMappingQuality());
|
||||
if(matches) break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!matches)
|
||||
throw new StingException(String.format("Read %s mismatches!", read.getReadName()));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
aligner.close();
|
||||
super.onTraversalDone(result);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
package org.broadinstitute.sting.alignment.bwa;
|
||||
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* Javadoc goes here.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
||||
@Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false)
|
||||
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
|
||||
|
||||
/**
|
||||
* The actual aligner.
|
||||
*/
|
||||
private BWACAligner aligner = null;
|
||||
|
||||
/**
|
||||
* A random number generator, used to generate alignments.
|
||||
*/
|
||||
private Random random = new Random();
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
aligner = new BWACAligner(prefix + ".ann",
|
||||
prefix + ".amb",
|
||||
prefix + ".pac",
|
||||
prefix + ".bwt",
|
||||
prefix + ".sa",
|
||||
prefix + ".rbwt",
|
||||
prefix + ".rsa");
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
@Override
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
SAMRecord[] alignedReads = aligner.align(read);
|
||||
SAMRecord selectedRead = alignedReads[random.nextInt(alignedReads.length)];
|
||||
out.println(selectedRead.format());
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
aligner.close();
|
||||
super.onTraversalDone(result);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -82,10 +82,40 @@ public class BWACAligner {
|
|||
* @param bases ASCII representation of byte array.
|
||||
* @return an array of indices into the bwa.
|
||||
*/
|
||||
public Alignment[] align(byte[] bases) {
|
||||
public Alignment[] getAlignments(byte[] bases) {
|
||||
if(thunkPointer == 0)
|
||||
throw new StingException("BWA/C align attempted, but BWA/C was never properly initialized.");
|
||||
return align(thunkPointer,bases);
|
||||
return getAlignments(thunkPointer,bases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Push all alignment data into individual SAMRecords, gaining in convenience but losing some of
|
||||
* the additional data stored in an alignment object.
|
||||
* @param read The read to align.
|
||||
* @return A list of potential alignments.
|
||||
*/
|
||||
public SAMRecord[] align(SAMRecord read) {
|
||||
Alignment[] alignments = getAlignments(read.getReadBases());
|
||||
SAMRecord[] reads = new SAMRecord[alignments.length];
|
||||
for(int i = 0; i < alignments.length; i++) {
|
||||
try {
|
||||
reads[i] = (SAMRecord)read.clone();
|
||||
reads[i].setReadUmappedFlag(false);
|
||||
reads[i].setAlignmentStart((int)alignments[i].getAlignmentStart());
|
||||
reads[i].setReadNegativeStrandFlag(alignments[i].isNegativeStrand());
|
||||
reads[i].setMappingQuality(alignments[i].getMappingQuality());
|
||||
reads[i].setCigar(alignments[i].getCigar());
|
||||
if(alignments[i].isNegativeStrand()) {
|
||||
reads[i].setReadBases(BaseUtils.reverse(reads[i].getReadBases()));
|
||||
reads[i].setBaseQualities(BaseUtils.reverse(reads[i].getBaseQualities()));
|
||||
}
|
||||
}
|
||||
catch(CloneNotSupportedException ex) {
|
||||
throw new StingException("Unable to create aligned read from template.");
|
||||
}
|
||||
}
|
||||
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -94,52 +124,5 @@ public class BWACAligner {
|
|||
* @param thunkPointer pointer to the C++ object managing BWA/C.
|
||||
* @param bases ASCII representation of byte array.
|
||||
*/
|
||||
protected native Alignment[] align(long thunkPointer, byte[] bases);
|
||||
|
||||
public static void main(String[] args) {
|
||||
String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta";
|
||||
BWACAligner thunk = new BWACAligner(prefix + ".ann",
|
||||
prefix + ".amb",
|
||||
prefix + ".pac",
|
||||
prefix + ".bwt",
|
||||
prefix + ".sa",
|
||||
prefix + ".rbwt",
|
||||
prefix + ".rsa");
|
||||
|
||||
SAMFileReader reader = new SAMFileReader(new File("/Users/mhanna/reference/Ecoli/MV1994.bam"));
|
||||
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||
|
||||
int count = 0;
|
||||
|
||||
try{
|
||||
System.out.println("Press any key");
|
||||
System.in.read();
|
||||
}
|
||||
catch(Exception ex) {
|
||||
|
||||
}
|
||||
|
||||
for(SAMRecord read: reader) {
|
||||
count++;
|
||||
//if(count > 1) break;
|
||||
//if(!read.getReadName().equals("SL-XBC:1:83:664:1077#0"))
|
||||
// continue;
|
||||
byte[] bases = read.getReadBases();
|
||||
if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases);
|
||||
|
||||
Alignment[] alignments = thunk.align(bases);
|
||||
//System.out.printf("Read: %s: ", read.getReadName());
|
||||
//for(Alignment alignment: alignments)
|
||||
// System.out.printf("tig = %d; pos = %d, neg strand = %b, mapQ = %d, cigar = %s;",
|
||||
// alignment.getContigIndex(),
|
||||
// alignment.getAlignmentStart(),
|
||||
// alignment.isNegativeStrand(),
|
||||
// alignment.getMappingQuality(),
|
||||
// alignment.getCigarString());
|
||||
if(count % 10000 == 0) System.out.printf("Processed %d reads.%n",count);
|
||||
//System.out.printf("%n");
|
||||
}
|
||||
|
||||
thunk.close();
|
||||
}
|
||||
protected native Alignment[] getAlignments(long thunkPointer, byte[] bases);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue