diff --git a/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java b/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java index 36e4f39de..ae96fc07e 100644 --- a/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java +++ b/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java @@ -4,11 +4,12 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.alignment.Alignment; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration; import net.sf.samtools.SAMRecord; +import java.util.Iterator; + /** * Validates alignments against existing reads. * @@ -32,11 +33,6 @@ public class AlignmentValidationWalker extends ReadWalker { */ private int count = 0; - /** - * Max delta in mapping quality. - */ - private int maxMapQDelta = 0; - /** * Create an aligner object. The aligner object will load and hold the BWT until close() is called. */ @@ -69,18 +65,18 @@ public class AlignmentValidationWalker extends ReadWalker { if(read.getReadNegativeStrandFlag()) bases = BaseUtils.simpleReverseComplement(bases); boolean matches = true; - Alignment[] alignments = aligner.getAlignments(bases); + Iterator alignments = aligner.getAlignments(bases); - if(alignments.length == 0 && !read.getReadUnmappedFlag()) - matches = false; + if(!alignments.hasNext()) { + matches = read.getReadUnmappedFlag(); + } else { - for(Alignment alignment: alignments) { + Alignment[] alignmentsOfBestQuality = alignments.next(); + for(Alignment alignment: alignmentsOfBestQuality) { matches = (alignment.getContigIndex() == read.getReferenceIndex()); matches &= (alignment.getAlignmentStart() == read.getAlignmentStart()); matches &= (alignment.isNegativeStrand() == read.getReadNegativeStrandFlag()); matches &= (alignment.getCigar().equals(read.getCigar())); - int mapQDelta = Math.abs(alignment.getMappingQuality()-read.getMappingQuality()); - maxMapQDelta = Math.max(mapQDelta,maxMapQDelta); matches &= (alignment.getMappingQuality() == read.getMappingQuality()); if(matches) break; } @@ -94,13 +90,14 @@ public class AlignmentValidationWalker extends ReadWalker { logger.error(String.format(" Negative strand: %b", read.getReadNegativeStrandFlag())); logger.error(String.format(" Cigar: %s%n", read.getCigarString())); logger.error(String.format(" Mapping quality: %s%n", read.getMappingQuality())); - for(int i = 0; i < alignments.length; i++) { + Alignment[] allAlignments = aligner.getAllAlignments(bases); + for(int i = 0; i < allAlignments.length; i++) { logger.error(String.format("Alignment %d:",i)); - logger.error(String.format(" Contig index: %d",alignments[i].getContigIndex())); - logger.error(String.format(" Alignment start: %d", alignments[i].getAlignmentStart())); - logger.error(String.format(" Negative strand: %b", alignments[i].isNegativeStrand())); - logger.error(String.format(" Cigar: %s", alignments[i].getCigarString())); - logger.error(String.format(" Mapping quality: %s%n", alignments[i].getMappingQuality())); + logger.error(String.format(" Contig index: %d",allAlignments[i].getContigIndex())); + logger.error(String.format(" Alignment start: %d", allAlignments[i].getAlignmentStart())); + logger.error(String.format(" Negative strand: %b", allAlignments[i].isNegativeStrand())); + logger.error(String.format(" Cigar: %s", allAlignments[i].getCigarString())); + logger.error(String.format(" Mapping quality: %s%n", allAlignments[i].getMappingQuality())); } throw new StingException(String.format("Read %s mismatches!", read.getReadName())); } @@ -137,7 +134,6 @@ public class AlignmentValidationWalker extends ReadWalker { @Override public void onTraversalDone(Integer result) { aligner.close(); - out.printf("Max mapping quality delta: %s%n", maxMapQDelta); super.onTraversalDone(result); } diff --git a/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java index aacf8bc04..a2f1cbad5 100644 --- a/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java +++ b/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration; import net.sf.samtools.SAMRecord; import java.util.Random; +import java.util.Iterator; /** * Align reads to the reference specified by BWTPrefix. @@ -45,10 +46,17 @@ public class AlignmentWalker extends ReadWalker { */ @Override public Integer map(char[] ref, SAMRecord read) { - SAMRecord[] alignedReads = aligner.align(read); - SAMRecord selectedRead = alignedReads[random.nextInt(alignedReads.length)]; - out.println(selectedRead.format()); - return alignedReads.length; + Iterator alignedReads = aligner.align(read); + if(alignedReads.hasNext()) { + SAMRecord[] bestAlignments = alignedReads.next(); + SAMRecord selectedRead = bestAlignments[random.nextInt(bestAlignments.length)]; + out.println(selectedRead.format()); + return bestAlignments.length; + } + else { + out.println(read.format()); + return 0; + } } /** diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java index 34b4d096c..e56caf0dd 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java @@ -6,6 +6,10 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.alignment.Alignment; import org.broadinstitute.sting.alignment.bwa.c.BWACConfiguration; +import java.util.Comparator; +import java.util.Arrays; +import java.util.Iterator; + /** * An aligner using the BWA/C implementation. * @@ -50,15 +54,90 @@ public class BWACAligner { destroy(thunkPointer); } + /** + * Get a iterator of alignments, batched by mapping quality. + * @param bases List of bases. + * @return Iterator to alignments. + */ + public Iterator getAlignments(byte[] bases) { + final Alignment[] alignments = getAllAlignments(bases); + return new Iterator() { + /** + * The last position accessed. + */ + private int position = 0; + + /** + * Whether all alignments have been seen based on the current position. + * @return True if any more alignments are pending. False otherwise. + */ + public boolean hasNext() { return position < alignments.length; } + + /** + * Return the next cross-section of alignments, based on mapping quality. + * @return Array of the next set of alignments of a given mapping quality. + */ + public Alignment[] next() { + if(position >= alignments.length) + throw new UnsupportedOperationException("Out of alignments to return."); + int mapQ = alignments[position].getMappingQuality(); + int startingPosition = position; + while(position < alignments.length && alignments[position].getMappingQuality() == mapQ) position++; + return Arrays.copyOfRange(alignments,startingPosition,position); + } + + /** + * Unsupported. + */ + public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); } + }; + } + /** * Align the given base array to the BWT. The base array should be in ASCII format. * @param bases ASCII representation of byte array. * @return an array of indices into the bwa. */ - public Alignment[] getAlignments(byte[] bases) { + public Alignment[] getAllAlignments(byte[] bases) { if(thunkPointer == 0) throw new StingException("BWA/C align attempted, but BWA/C was never properly initialized."); - return getAlignments(thunkPointer,bases); + Alignment[] alignments = getAlignments(thunkPointer,bases); + Arrays.sort(alignments,new AlignmentMapQComparator()); + return alignments; + } + + /** + * Get a iterator of aligned reads, batched by mapping quality. + * @param read Read to align. + * @return Iterator to alignments. + */ + public Iterator align(final SAMRecord read) { + final Iterator alignments = getAlignments(read.getReadBases()); + return new Iterator() { + /** + * Whether all alignments have been seen based on the current position. + * @return True if any more alignments are pending. False otherwise. + */ + public boolean hasNext() { return alignments.hasNext(); } + + /** + * Return the next cross-section of alignments, based on mapping quality. + * @return Array of the next set of alignments of a given mapping quality. + */ + public SAMRecord[] next() { + Alignment[] alignmentsOfQuality = alignments.next(); + SAMRecord[] reads = new SAMRecord[alignmentsOfQuality.length]; + for(int i = 0; i < alignmentsOfQuality.length; i++) { + reads[i] = convertAlignmentToRead(read,alignmentsOfQuality[i]); + } + return reads; + } + + /** + * Unsupported. + */ + public void remove() { throw new UnsupportedOperationException("Cannot remove from an alignment iterator"); } + }; } /** @@ -67,30 +146,41 @@ public class BWACAligner { * @param read The read to align. * @return A list of potential alignments. */ - public SAMRecord[] align(SAMRecord read) { - Alignment[] alignments = getAlignments(read.getReadBases()); + public SAMRecord[] alignAll(SAMRecord read) { + Alignment[] alignments = getAllAlignments(read.getReadBases()); SAMRecord[] reads = new SAMRecord[alignments.length]; - for(int i = 0; i < alignments.length; i++) { - try { - reads[i] = (SAMRecord)read.clone(); - reads[i].setReadUmappedFlag(false); - reads[i].setAlignmentStart((int)alignments[i].getAlignmentStart()); - reads[i].setReadNegativeStrandFlag(alignments[i].isNegativeStrand()); - reads[i].setMappingQuality(alignments[i].getMappingQuality()); - reads[i].setCigar(alignments[i].getCigar()); - if(alignments[i].isNegativeStrand()) { - reads[i].setReadBases(BaseUtils.reverse(reads[i].getReadBases())); - reads[i].setBaseQualities(BaseUtils.reverse(reads[i].getBaseQualities())); - } - } - catch(CloneNotSupportedException ex) { - throw new StingException("Unable to create aligned read from template."); - } - } + for(int i = 0; i < alignments.length; i++) + reads[i] = convertAlignmentToRead(read,alignments[i]); return reads; } + /** + * Creates a read directly from an alignment. + * @param unmappedRead Source of the unmapped read. Should have bases, quality scores, and flags. + * @param alignment The target alignment for this read. + * @return A mapped alignment. + */ + public SAMRecord convertAlignmentToRead(SAMRecord unmappedRead, Alignment alignment) { + SAMRecord read = null; + try { + read = (SAMRecord)unmappedRead.clone(); + read.setReadUmappedFlag(false); + read.setAlignmentStart((int)alignment.getAlignmentStart()); + read.setReadNegativeStrandFlag(alignment.isNegativeStrand()); + read.setMappingQuality(alignment.getMappingQuality()); + read.setCigar(alignment.getCigar()); + if(alignment.isNegativeStrand()) { + read.setReadBases(BaseUtils.reverse(read.getReadBases())); + read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities())); + } + } + catch(CloneNotSupportedException ex) { + throw new StingException("Unable to create aligned read from template."); + } + return read; + } + /** * Caller to the . The base array should be in * ASCII format. @@ -98,4 +188,22 @@ public class BWACAligner { * @param bases ASCII representation of byte array. */ protected native Alignment[] getAlignments(long thunkPointer, byte[] bases); + + /** + * Compares two alignments based on their mapping quality. The one with the highest mapping quality comes FIRST. + */ + private class AlignmentMapQComparator implements Comparator { + /** + * Compares two alignments based on their score. If lhs has a higher mapping quality than + * rhs, the score will be negative. + * @param lhs Left-hand side for comparison. + * @param rhs Right-hand side for comparison. + * @return Negative value if lhs' score is highe + */ + public int compare(Alignment lhs, Alignment rhs) { + // NOTE: this strategy works because Integer.MAX_VALUE, Integer.MIN_VALUE >> than valid range of mapping + // qualities. + return rhs.getMappingQuality() - lhs.getMappingQuality(); + } + } }