diff --git a/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java
index 3bd3b5350..754a66029 100644
--- a/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java
+++ b/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java
@@ -64,6 +64,24 @@ public class AlignmentUtils {
return getMismatchCount(r, refSeq, refIndex).numMismatches;
}
+ /** Same as #numMismatches(SAMRecord, byte[], refIndex), but counts mismatches only along the partial stretch
+ * on the read of length nReadBases starting at (0-based) position readIndex.
+ * @param r Aligned read to count mismatches for
+ * @param refSeq Chunk of reference sequence that subsumes the alignment
+ * @param refIndex Zero-based position on refSeq where the alighnment for the whole read starts
+ * @param readIndex Zero-based position on the read, the mismatches will be counted only from this position on
+ * @param nReadBases Length of continuous stretch on the read, along which mismatches will be counted
+ * @return
+ */
+ public static int numMismatches(SAMRecord r, byte[] refSeq, int refIndex, int readIndex, int nReadBases) {
+ if ( r.getReadUnmappedFlag() ) return 1000000;
+ return getMismatchCount(r, refSeq, refIndex,readIndex,nReadBases).numMismatches;
+ }
+
+ public static long mismatchingQualities(SAMRecord r, byte[] refSeq, int refIndex, int readIndex, int nReadBases) {
+ return getMismatchCount(r, refSeq, refIndex,readIndex,nReadBases).mismatchQualities;
+ }
+
@Deprecated
public static int numMismatches(SAMRecord r, String refSeq, int refIndex ) {
if ( r.getReadUnmappedFlag() ) return 1000000;
@@ -80,21 +98,31 @@ public class AlignmentUtils {
return numMismatches(r, StringUtil.stringToBytes(refSeq), refIndex);
}
+ private static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex) {
+ return getMismatchCount(r,refSeq,refIndex,0,r.getReadLength());
+ }
+
// todo -- this code and mismatchesInRefWindow should be combined and optimized into a single
// todo -- high performance implementation. We can do a lot better than this right now
- private static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex) {
+ private static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex, int startOnRead, int nReadBases) {
MismatchCount mc = new MismatchCount();
int readIdx = 0;
+ int endOnRead = startOnRead + nReadBases - 1; // index of the last base on read we want to count
byte[] readSeq = r.getReadBases();
Cigar c = r.getCigar();
for (int i = 0 ; i < c.numCigarElements() ; i++) {
+
+ if ( readIdx > endOnRead ) break;
+
CigarElement ce = c.getCigarElement(i);
switch ( ce.getOperator() ) {
case M:
for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIdx++ ) {
if ( refIndex >= refSeq.length )
continue;
+ if ( readIdx < startOnRead ) continue;
+ if ( readIdx > endOnRead ) break;
byte refChr = refSeq[refIndex];
byte readChr = readSeq[readIdx];
// Note: we need to count X/N's as mismatches because that's what SAM requires
@@ -337,6 +365,28 @@ public class AlignmentUtils {
return true;
}
+ /**
+ * Due to (unfortunate) multiple ways to indicate that read/mate is unmapped allowed by SAM format
+ * specification, one may need this convenience shortcut. Checks both 'mate unmapped' flag and
+ * alignment reference index/start of the mate.
+ * @param r sam record for the read
+ * @return true if read's mate is unmapped
+ */
+ public static boolean isMateUnmapped(final SAMRecord r) {
+ if ( r.getMateUnmappedFlag() ) return true;
+
+ // our life would be so much easier if all sam files followed the specs. In reality,
+ // sam files (including those generated by maq or bwa) miss headers alltogether. When
+ // reading such a SAM file, reference name is set, but since there is no sequence dictionary,
+ // null is always returned for referenceIndex. Let's be paranoid here, and make sure that
+ // we do not call the read "unmapped" when it has only reference name set with ref. index missing
+ // or vice versa.
+ if ( ( r.getMateReferenceIndex() != null && r.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX
+ || r.getMateReferenceName() != null && r.getMateReferenceName() != SAMRecord.NO_ALIGNMENT_REFERENCE_NAME )
+ && r.getMateAlignmentStart() != SAMRecord.NO_ALIGNMENT_START ) return false ;
+ return true;
+ }
+
/** Returns the array of base qualitites in the order the bases were read on the machine (i.e. always starting from
* cycle 1). In other words, if the read is unmapped or aligned in the forward direction, the read's own base
* qualities are returned as stored in the SAM record; if the read is aligned in the reverse direction, the array