Finally reinstate the iterator-style interface. Get rid of some scaffolding code.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2052 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-11-16 02:34:19 +00:00
parent 103763fc84
commit ce5034dc5d
3 changed files with 156 additions and 44 deletions

View File

@ -4,11 +4,12 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
import net.sf.samtools.SAMRecord;
import java.util.Iterator;
/**
* Validates alignments against existing reads.
*
@ -32,11 +33,6 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
*/
private int count = 0;
/**
* Max delta in mapping quality.
*/
private int maxMapQDelta = 0;
/**
* Create an aligner object. The aligner object will load and hold the BWT until close() is called.
*/
@ -69,18 +65,18 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases);
boolean matches = true;
Alignment[] alignments = aligner.getAlignments(bases);
Iterator<Alignment[]> alignments = aligner.getAlignments(bases);
if(alignments.length == 0 && !read.getReadUnmappedFlag())
matches = false;
if(!alignments.hasNext()) {
matches = read.getReadUnmappedFlag();
}
else {
for(Alignment alignment: alignments) {
Alignment[] alignmentsOfBestQuality = alignments.next();
for(Alignment alignment: alignmentsOfBestQuality) {
matches = (alignment.getContigIndex() == read.getReferenceIndex());
matches &= (alignment.getAlignmentStart() == read.getAlignmentStart());
matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag());
matches &= (alignment.getCigar().equals(read.getCigar()));
int mapQDelta = Math.abs(alignment.getMappingQuality()-read.getMappingQuality());
maxMapQDelta = Math.max(mapQDelta,maxMapQDelta);
matches &= (alignment.getMappingQuality() == read.getMappingQuality());
if(matches) break;
}
@ -94,13 +90,14 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag()));
logger.error(String.format(" Cigar: %s%n", read.getCigarString()));
logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality()));
for(int i = 0; i < alignments.length; i++) {
Alignment[] allAlignments = aligner.getAllAlignments(bases);
for(int i = 0; i < allAlignments.length; i++) {
logger.error(String.format("Alignment %d:",i));
logger.error(String.format(" Contig index: %d",alignments[i].getContigIndex()));
logger.error(String.format(" Alignment start: %d", alignments[i].getAlignmentStart()));
logger.error(String.format(" Negative strand: %b", alignments[i].isNegativeStrand()));
logger.error(String.format(" Cigar: %s", alignments[i].getCigarString()));
logger.error(String.format(" Mapping quality: %s%n", alignments[i].getMappingQuality()));
logger.error(String.format(" Contig index: %d",allAlignments[i].getContigIndex()));
logger.error(String.format(" Alignment start: %d", allAlignments[i].getAlignmentStart()));
logger.error(String.format(" Negative strand: %b", allAlignments[i].isNegativeStrand()));
logger.error(String.format(" Cigar: %s", allAlignments[i].getCigarString()));
logger.error(String.format(" Mapping quality: %s%n", allAlignments[i].getMappingQuality()));
}
throw new StingException(String.format("Read %s mismatches!", read.getReadName()));
}
@ -137,7 +134,6 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
@Override
public void onTraversalDone(Integer result) {
aligner.close();
out.printf("Max mapping quality delta: %s%n", maxMapQDelta);
super.onTraversalDone(result);
}

View File

@ -7,6 +7,7 @@ import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
import net.sf.samtools.SAMRecord;
import java.util.Random;
import java.util.Iterator;
/**
* Align reads to the reference specified by BWTPrefix.
@ -45,10 +46,17 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
*/
@Override
public Integer map(char[] ref, SAMRecord read) {
SAMRecord[] alignedReads = aligner.align(read);
SAMRecord selectedRead = alignedReads[random.nextInt(alignedReads.length)];
out.println(selectedRead.format());
return alignedReads.length;
Iterator<SAMRecord[]> alignedReads = aligner.align(read);
if(alignedReads.hasNext()) {
SAMRecord[] bestAlignments = alignedReads.next();
SAMRecord selectedRead = bestAlignments[random.nextInt(bestAlignments.length)];
out.println(selectedRead.format());
return bestAlignments.length;
}
else {
out.println(read.format());
return 0;
}
}
/**

View File

@ -6,6 +6,10 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration;
import java.util.Comparator;
import java.util.Arrays;
import java.util.Iterator;
/**
* An aligner using the BWA/C implementation.
*
@ -50,15 +54,90 @@ public class BWACAligner {
destroy(thunkPointer);
}
/**
* Get a iterator of alignments, batched by mapping quality.
* @param bases List of bases.
* @return Iterator to alignments.
*/
public Iterator<Alignment[]> getAlignments(byte[] bases) {
final Alignment[] alignments = getAllAlignments(bases);
return new Iterator<Alignment[]>() {
/**
* The last position accessed.
*/
private int position = 0;
/**
* Whether all alignments have been seen based on the current position.
* @return True if any more alignments are pending. False otherwise.
*/
public boolean hasNext() { return position < alignments.length; }
/**
* Return the next cross-section of alignments, based on mapping quality.
* @return Array of the next set of alignments of a given mapping quality.
*/
public Alignment[] next() {
if(position >= alignments.length)
throw new UnsupportedOperationException("Out of alignments to return.");
int mapQ = alignments[position].getMappingQuality();
int startingPosition = position;
while(position < alignments.length && alignments[position].getMappingQuality() == mapQ) position++;
return Arrays.copyOfRange(alignments,startingPosition,position);
}
/**
* Unsupported.
*/
public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); }
};
}
/**
* Align the given base array to the BWT. The base array should be in ASCII format.
* @param bases ASCII representation of byte array.
* @return an array of indices into the bwa.
*/
public Alignment[] getAlignments(byte[] bases) {
public Alignment[] getAllAlignments(byte[] bases) {
if(thunkPointer == 0)
throw new StingException("BWA/C align attempted, but BWA/C was never properly initialized.");
return getAlignments(thunkPointer,bases);
Alignment[] alignments = getAlignments(thunkPointer,bases);
Arrays.sort(alignments,new AlignmentMapQComparator());
return alignments;
}
/**
* Get a iterator of aligned reads, batched by mapping quality.
* @param read Read to align.
* @return Iterator to alignments.
*/
public Iterator<SAMRecord[]> align(final SAMRecord read) {
final Iterator<Alignment[]> alignments = getAlignments(read.getReadBases());
return new Iterator<SAMRecord[]>() {
/**
* Whether all alignments have been seen based on the current position.
* @return True if any more alignments are pending. False otherwise.
*/
public boolean hasNext() { return alignments.hasNext(); }
/**
* Return the next cross-section of alignments, based on mapping quality.
* @return Array of the next set of alignments of a given mapping quality.
*/
public SAMRecord[] next() {
Alignment[] alignmentsOfQuality = alignments.next();
SAMRecord[] reads = new SAMRecord[alignmentsOfQuality.length];
for(int i = 0; i < alignmentsOfQuality.length; i++) {
reads[i] = convertAlignmentToRead(read,alignmentsOfQuality[i]);
}
return reads;
}
/**
* Unsupported.
*/
public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); }
};
}
/**
@ -67,30 +146,41 @@ public class BWACAligner {
* @param read The read to align.
* @return A list of potential alignments.
*/
public SAMRecord[] align(SAMRecord read) {
Alignment[] alignments = getAlignments(read.getReadBases());
public SAMRecord[] alignAll(SAMRecord read) {
Alignment[] alignments = getAllAlignments(read.getReadBases());
SAMRecord[] reads = new SAMRecord[alignments.length];
for(int i = 0; i < alignments.length; i++) {
try {
reads[i] = (SAMRecord)read.clone();
reads[i].setReadUmappedFlag(false);
reads[i].setAlignmentStart((int)alignments[i].getAlignmentStart());
reads[i].setReadNegativeStrandFlag(alignments[i].isNegativeStrand());
reads[i].setMappingQuality(alignments[i].getMappingQuality());
reads[i].setCigar(alignments[i].getCigar());
if(alignments[i].isNegativeStrand()) {
reads[i].setReadBases(BaseUtils.reverse(reads[i].getReadBases()));
reads[i].setBaseQualities(BaseUtils.reverse(reads[i].getBaseQualities()));
}
}
catch(CloneNotSupportedException ex) {
throw new StingException("Unable to create aligned read from template.");
}
}
for(int i = 0; i < alignments.length; i++)
reads[i] = convertAlignmentToRead(read,alignments[i]);
return reads;
}
/**
* Creates a read directly from an alignment.
* @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags.
* @param alignment The target alignment for this read.
* @return A mapped alignment.
*/
public SAMRecord convertAlignmentToRead(SAMRecord unmappedRead, Alignment alignment) {
SAMRecord read = null;
try {
read = (SAMRecord)unmappedRead.clone();
read.setReadUmappedFlag(false);
read.setAlignmentStart((int)alignment.getAlignmentStart());
read.setReadNegativeStrandFlag(alignment.isNegativeStrand());
read.setMappingQuality(alignment.getMappingQuality());
read.setCigar(alignment.getCigar());
if(alignment.isNegativeStrand()) {
read.setReadBases(BaseUtils.reverse(read.getReadBases()));
read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities()));
}
}
catch(CloneNotSupportedException ex) {
throw new StingException("Unable to create aligned read from template.");
}
return read;
}
/**
* Caller to the . The base array should be in
* ASCII format.
@ -98,4 +188,22 @@ public class BWACAligner {
* @param bases ASCII representation of byte array.
*/
protected native Alignment[] getAlignments(long thunkPointer, byte[] bases);
/**
* Compares two alignments based on their mapping quality. The one with the highest mapping quality comes FIRST.
*/
private class AlignmentMapQComparator implements Comparator<Alignment> {
/**
* Compares two alignments based on their score. If lhs has a higher mapping quality than
* rhs, the score will be negative.
* @param lhs Left-hand side for comparison.
* @param rhs Right-hand side for comparison.
* @return Negative value if lhs' score is highe
*/
public int compare(Alignment lhs, Alignment rhs) {
// NOTE: this strategy works because Integer.MAX_VALUE, Integer.MIN_VALUE >> than valid range of mapping
// qualities.
return rhs.getMappingQuality() - lhs.getMappingQuality();
}
}
}