2010-04-20 07:00:08 +08:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2010 The Broad Institute
|
2010-04-20 23:26:32 +08:00
|
|
|
*
|
2010-04-20 07:00:08 +08:00
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
2010-04-20 23:26:32 +08:00
|
|
|
* files (the "Software"), to deal in the Software without
|
2010-04-20 07:00:08 +08:00
|
|
|
* restriction, including without limitation the rights to use,
|
|
|
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
* copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following
|
|
|
|
|
* conditions:
|
2010-04-20 23:26:32 +08:00
|
|
|
*
|
2010-04-20 07:00:08 +08:00
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
|
*
|
2010-04-20 23:26:32 +08:00
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
2010-04-20 07:00:08 +08:00
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
|
|
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
package org.broadinstitute.sting.utils.sam;
|
2009-03-25 13:48:10 +08:00
|
|
|
|
2009-06-07 03:35:21 +08:00
|
|
|
import net.sf.samtools.CigarOperator;
|
2009-03-25 13:48:10 +08:00
|
|
|
import net.sf.samtools.SAMRecord;
|
|
|
|
|
import net.sf.samtools.Cigar;
|
|
|
|
|
import net.sf.samtools.CigarElement;
|
2010-01-28 05:36:42 +08:00
|
|
|
import net.sf.samtools.util.StringUtil;
|
2009-12-13 10:52:18 +08:00
|
|
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
|
|
|
import org.broadinstitute.sting.utils.pileup.*;
|
2010-04-20 07:00:08 +08:00
|
|
|
import org.broadinstitute.sting.utils.StingException;
|
|
|
|
|
import org.broadinstitute.sting.utils.BaseUtils;
|
2009-03-25 13:48:10 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
public class AlignmentUtils {
|
|
|
|
|
|
2010-03-24 23:21:58 +08:00
|
|
|
private static class MismatchCount {
|
|
|
|
|
public int numMismatches = 0;
|
|
|
|
|
public long mismatchQualities = 0;
|
2009-05-12 06:45:11 +08:00
|
|
|
}
|
|
|
|
|
|
2009-06-07 02:07:48 +08:00
|
|
|
/** Returns number of mismatches in the alignment <code>r</code> to the reference sequence
|
|
|
|
|
* <code>refSeq</code> assuming the alignment starts at (ZERO-based) position <code>refIndex</code> on the
|
|
|
|
|
* specified reference sequence; in other words, <code>refIndex</code> is used in place of alignment's own
|
|
|
|
|
* getAlignmentStart() coordinate and the latter is never used. However, the structure of the alignment <code>r</code>
|
|
|
|
|
* (i.e. it's cigar string with all the insertions/deletions it may specify) is fully respected.
|
2010-01-28 05:36:42 +08:00
|
|
|
*
|
|
|
|
|
* THIS CODE ASSUMES THAT ALL BYTES COME FROM UPPERCASED CHARS.
|
2009-06-07 02:07:48 +08:00
|
|
|
*
|
|
|
|
|
* @param r alignment
|
|
|
|
|
* @param refSeq chunk of reference sequence that subsumes the alignment completely (if alignment runs out of
|
|
|
|
|
* the reference string, IndexOutOfBound exception will be thrown at runtime).
|
|
|
|
|
* @param refIndex zero-based position, at which the alignment starts on the specified reference string.
|
2009-12-17 01:28:09 +08:00
|
|
|
* @return the number of mismatches
|
2009-06-07 02:07:48 +08:00
|
|
|
*/
|
2010-01-28 05:36:42 +08:00
|
|
|
public static int numMismatches(SAMRecord r, byte[] refSeq, int refIndex) {
|
2010-03-24 23:21:58 +08:00
|
|
|
return getMismatchCount(r, refSeq, refIndex).numMismatches;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static int numMismatches(SAMRecord r, String refSeq, int refIndex ) {
|
|
|
|
|
if ( r.getReadUnmappedFlag() ) return 1000000;
|
|
|
|
|
return numMismatches(r, StringUtil.stringToBytes(refSeq), refIndex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static long mismatchingQualities(SAMRecord r, byte[] refSeq, int refIndex) {
|
|
|
|
|
return getMismatchCount(r, refSeq, refIndex).mismatchQualities;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static long mismatchingQualities(SAMRecord r, String refSeq, int refIndex ) {
|
|
|
|
|
if ( r.getReadUnmappedFlag() ) return 1000000;
|
|
|
|
|
return numMismatches(r, StringUtil.stringToBytes(refSeq), refIndex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex) {
|
|
|
|
|
MismatchCount mc = new MismatchCount();
|
|
|
|
|
|
2009-06-07 02:07:48 +08:00
|
|
|
int readIdx = 0;
|
2009-12-13 10:52:18 +08:00
|
|
|
byte[] readSeq = r.getReadBases();
|
2009-03-25 13:48:10 +08:00
|
|
|
Cigar c = r.getCigar();
|
2009-06-07 02:07:48 +08:00
|
|
|
for (int i = 0 ; i < c.numCigarElements() ; i++) {
|
|
|
|
|
CigarElement ce = c.getCigarElement(i);
|
|
|
|
|
switch ( ce.getOperator() ) {
|
2009-03-25 13:48:10 +08:00
|
|
|
case M:
|
2009-06-07 02:07:48 +08:00
|
|
|
for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIdx++ ) {
|
2010-01-28 05:36:42 +08:00
|
|
|
if ( refIndex >= refSeq.length )
|
2009-06-30 00:50:05 +08:00
|
|
|
continue;
|
2010-01-28 05:36:42 +08:00
|
|
|
byte refChr = refSeq[refIndex];
|
|
|
|
|
byte readChr = readSeq[readIdx];
|
2009-07-01 00:08:55 +08:00
|
|
|
// Note: we need to count X/N's as mismatches because that's what SAM requires
|
|
|
|
|
//if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
|
|
|
|
// BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
|
|
|
|
// continue; // do not count Ns/Xs/etc ?
|
2010-03-24 23:21:58 +08:00
|
|
|
if ( readChr != refChr ) {
|
|
|
|
|
mc.numMismatches++;
|
|
|
|
|
mc.mismatchQualities += r.getBaseQualities()[readIdx];
|
|
|
|
|
}
|
2009-03-25 13:48:10 +08:00
|
|
|
}
|
|
|
|
|
break;
|
2009-06-07 02:07:48 +08:00
|
|
|
case I:
|
2010-01-14 11:45:42 +08:00
|
|
|
case S:
|
2009-06-07 02:07:48 +08:00
|
|
|
readIdx += ce.getLength();
|
|
|
|
|
break;
|
|
|
|
|
case D:
|
2010-01-14 11:45:42 +08:00
|
|
|
case N:
|
2009-06-07 02:07:48 +08:00
|
|
|
refIndex += ce.getLength();
|
|
|
|
|
break;
|
2010-01-14 11:45:42 +08:00
|
|
|
default: throw new StingException("The " + ce.getOperator() + " cigar element is not currently supported");
|
2009-03-25 13:48:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
2010-03-24 23:21:58 +08:00
|
|
|
return mc;
|
2009-03-25 13:48:10 +08:00
|
|
|
}
|
|
|
|
|
|
2009-12-13 10:52:18 +08:00
|
|
|
/** Returns the number of mismatches in the pileup within the given reference context.
|
|
|
|
|
*
|
|
|
|
|
* @param pileup the pileup with reads
|
|
|
|
|
* @param ref the reference context
|
|
|
|
|
* @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window)
|
2009-12-17 01:28:09 +08:00
|
|
|
* @return the number of mismatches
|
2009-12-13 10:52:18 +08:00
|
|
|
*/
|
2009-12-17 01:28:09 +08:00
|
|
|
public static int mismatchesInRefWindow(ReadBackedPileup pileup, ReferenceContext ref, boolean ignoreTargetSite) {
|
|
|
|
|
int mismatches = 0;
|
|
|
|
|
for ( PileupElement p : pileup )
|
|
|
|
|
mismatches += mismatchesInRefWindow(p, ref, ignoreTargetSite);
|
2009-12-13 10:52:18 +08:00
|
|
|
return mismatches;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Returns the number of mismatches in the pileup element within the given reference context.
|
|
|
|
|
*
|
|
|
|
|
* @param p the pileup element
|
|
|
|
|
* @param ref the reference context
|
|
|
|
|
* @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window)
|
2009-12-17 01:28:09 +08:00
|
|
|
* @return the number of mismatches
|
2009-12-13 10:52:18 +08:00
|
|
|
*/
|
2009-12-17 01:28:09 +08:00
|
|
|
public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite) {
|
2010-03-03 00:40:03 +08:00
|
|
|
return mismatchesInRefWindow(p, ref, ignoreTargetSite, false);
|
|
|
|
|
}
|
2009-12-13 10:52:18 +08:00
|
|
|
|
2010-03-03 00:40:03 +08:00
|
|
|
/** Returns the number of mismatches in the pileup element within the given reference context.
|
|
|
|
|
*
|
|
|
|
|
* @param p the pileup element
|
|
|
|
|
* @param ref the reference context
|
|
|
|
|
* @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window)
|
|
|
|
|
* @param qualitySumInsteadOfMismatchCount if true, return the quality score sum of the mismatches rather than the count
|
|
|
|
|
* @return the number of mismatches
|
|
|
|
|
*/
|
|
|
|
|
public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite, boolean qualitySumInsteadOfMismatchCount) {
|
|
|
|
|
int sum = 0;
|
2009-12-13 10:52:18 +08:00
|
|
|
|
2009-12-17 05:39:58 +08:00
|
|
|
int windowStart = (int)ref.getWindow().getStart();
|
|
|
|
|
int windowStop = (int)ref.getWindow().getStop();
|
2009-12-13 10:52:18 +08:00
|
|
|
char[] refBases = ref.getBases();
|
|
|
|
|
byte[] readBases = p.getRead().getReadBases();
|
2010-03-03 00:40:03 +08:00
|
|
|
byte[] readQualities = p.getRead().getBaseQualities();
|
2009-12-13 10:52:18 +08:00
|
|
|
Cigar c = p.getRead().getCigar();
|
|
|
|
|
|
|
|
|
|
int readIndex = 0;
|
|
|
|
|
int currentPos = p.getRead().getAlignmentStart();
|
2009-12-17 05:39:58 +08:00
|
|
|
int refIndex = Math.max(0, currentPos - windowStart);
|
2009-12-13 10:52:18 +08:00
|
|
|
|
|
|
|
|
for (int i = 0 ; i < c.numCigarElements() ; i++) {
|
|
|
|
|
CigarElement ce = c.getCigarElement(i);
|
2009-12-17 05:39:58 +08:00
|
|
|
int cigarElementLength = ce.getLength();
|
2009-12-13 10:52:18 +08:00
|
|
|
switch ( ce.getOperator() ) {
|
|
|
|
|
case M:
|
2009-12-17 05:39:58 +08:00
|
|
|
for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) {
|
2009-12-13 10:52:18 +08:00
|
|
|
// are we past the ref window?
|
2009-12-17 05:39:58 +08:00
|
|
|
if ( currentPos > windowStop )
|
2009-12-13 10:52:18 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
// are we before the ref window?
|
2009-12-17 05:39:58 +08:00
|
|
|
if ( currentPos < windowStart )
|
2009-12-13 10:52:18 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
char refChr = refBases[refIndex++];
|
|
|
|
|
|
|
|
|
|
// do we need to skip the target site?
|
|
|
|
|
if ( ignoreTargetSite && ref.getLocus().getStart() == currentPos )
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
char readChr = (char)readBases[readIndex];
|
2010-03-03 00:40:03 +08:00
|
|
|
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
|
|
|
|
sum += (qualitySumInsteadOfMismatchCount) ? readQualities[readIndex] : 1;
|
2009-12-13 10:52:18 +08:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case I:
|
2010-01-14 11:45:42 +08:00
|
|
|
case S:
|
2009-12-17 05:39:58 +08:00
|
|
|
readIndex += cigarElementLength;
|
2009-12-13 10:52:18 +08:00
|
|
|
break;
|
|
|
|
|
case D:
|
2010-01-14 11:45:42 +08:00
|
|
|
case N:
|
2009-12-17 05:39:58 +08:00
|
|
|
currentPos += cigarElementLength;
|
|
|
|
|
if ( currentPos > windowStart )
|
|
|
|
|
refIndex += Math.min(cigarElementLength, currentPos - windowStart);
|
2009-12-13 10:52:18 +08:00
|
|
|
break;
|
2010-01-14 11:45:42 +08:00
|
|
|
default:
|
|
|
|
|
// fail silently
|
|
|
|
|
return 0;
|
2009-12-13 10:52:18 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-03-03 00:40:03 +08:00
|
|
|
return sum;
|
2009-12-13 10:52:18 +08:00
|
|
|
}
|
|
|
|
|
|
2009-06-07 03:35:21 +08:00
|
|
|
/** Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment.
|
|
|
|
|
* This method follows closely the SAMRecord::getAlignmentBlocks() implemented in samtools library, but
|
|
|
|
|
* it only counts blocks without actually allocating and filling the list of blocks themselves. Hence, this method is
|
|
|
|
|
* a much more efficient alternative to r.getAlignmentBlocks.size() in the situations when this number is all that is needed.
|
|
|
|
|
* Formally, this method simply returns the number of M elements in the cigar.
|
|
|
|
|
* @param r alignment
|
|
|
|
|
* @return number of continuous alignment blocks (i.e. 'M' elements of the cigar; all indel and clipping elements are ignored).
|
|
|
|
|
*/
|
|
|
|
|
public static int getNumAlignmentBlocks(final SAMRecord r) {
|
|
|
|
|
int n = 0;
|
|
|
|
|
final Cigar cigar = r.getCigar();
|
|
|
|
|
if (cigar == null) return 0;
|
|
|
|
|
|
|
|
|
|
for (final CigarElement e : cigar.getCigarElements()) {
|
|
|
|
|
if (e.getOperator() == CigarOperator.M ) n++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return n;
|
|
|
|
|
}
|
2009-03-25 13:48:10 +08:00
|
|
|
|
|
|
|
|
public static String toString(Cigar cig) {
|
|
|
|
|
StringBuilder b = new StringBuilder();
|
|
|
|
|
|
|
|
|
|
for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) {
|
|
|
|
|
char c='?';
|
|
|
|
|
switch ( cig.getCigarElement(i).getOperator() ) {
|
|
|
|
|
case M : c = 'M'; break;
|
|
|
|
|
case D : c = 'D'; break;
|
|
|
|
|
case I : c = 'I'; break;
|
|
|
|
|
}
|
|
|
|
|
b.append(cig.getCigarElement(i).getLength());
|
2009-06-07 03:35:21 +08:00
|
|
|
b.append(c);
|
2009-03-25 13:48:10 +08:00
|
|
|
}
|
|
|
|
|
return b.toString();
|
|
|
|
|
}
|
2009-06-09 00:04:49 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
public static String alignmentToString(final Cigar cigar,final String seq, final String ref, final int posOnRef ) {
|
|
|
|
|
return alignmentToString( cigar, seq, ref, posOnRef, 0 );
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-24 13:23:29 +08:00
|
|
|
public static String cigarToString(Cigar cig) {
|
|
|
|
|
if ( cig == null )
|
|
|
|
|
return "null";
|
|
|
|
|
|
|
|
|
|
StringBuilder b = new StringBuilder();
|
|
|
|
|
|
|
|
|
|
for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) {
|
|
|
|
|
char c='?';
|
|
|
|
|
switch ( cig.getCigarElement(i).getOperator() ) {
|
|
|
|
|
case M : c = 'M'; break;
|
|
|
|
|
case D : c = 'D'; break;
|
|
|
|
|
case I : c = 'I'; break;
|
|
|
|
|
}
|
|
|
|
|
b.append(cig.getCigarElement(i).getLength());
|
|
|
|
|
b.append(c);
|
|
|
|
|
}
|
|
|
|
|
return b.toString();
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-09 00:04:49 +08:00
|
|
|
public static String alignmentToString(final Cigar cigar,final String seq, final String ref, final int posOnRef, final int posOnRead ) {
|
|
|
|
|
int readPos = posOnRead;
|
|
|
|
|
int refPos = posOnRef;
|
|
|
|
|
|
|
|
|
|
StringBuilder refLine = new StringBuilder();
|
|
|
|
|
StringBuilder readLine = new StringBuilder();
|
|
|
|
|
|
|
|
|
|
for ( int i = 0 ; i < posOnRead ; i++ ) {
|
|
|
|
|
refLine.append( ref.charAt( refPos - readPos + i ) );
|
|
|
|
|
readLine.append( seq.charAt(i) ) ;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for ( int i = 0 ; i < cigar.numCigarElements() ; i++ ) {
|
|
|
|
|
|
|
|
|
|
final CigarElement ce = cigar.getCigarElement(i);
|
|
|
|
|
|
|
|
|
|
switch(ce.getOperator()) {
|
|
|
|
|
case I:
|
|
|
|
|
for ( int j = 0 ; j < ce.getLength(); j++ ) {
|
|
|
|
|
refLine.append('+');
|
|
|
|
|
readLine.append( seq.charAt( readPos++ ) );
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case D:
|
|
|
|
|
for ( int j = 0 ; j < ce.getLength(); j++ ) {
|
|
|
|
|
readLine.append('*');
|
|
|
|
|
refLine.append( ref.charAt( refPos++ ) );
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case M:
|
|
|
|
|
for ( int j = 0 ; j < ce.getLength(); j++ ) {
|
|
|
|
|
refLine.append(ref.charAt( refPos++ ) );
|
|
|
|
|
readLine.append( seq.charAt( readPos++ ) );
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default: throw new StingException("Unsupported cigar operator: "+ce.getOperator() );
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
refLine.append('\n');
|
|
|
|
|
refLine.append(readLine);
|
|
|
|
|
refLine.append('\n');
|
|
|
|
|
return refLine.toString();
|
|
|
|
|
}
|
2009-09-03 01:00:39 +08:00
|
|
|
|
2010-01-15 23:36:59 +08:00
|
|
|
public static char[] alignmentToCharArray( final Cigar cigar, final char[] read, final char[] ref ) {
|
|
|
|
|
|
|
|
|
|
final char[] alignment = new char[read.length];
|
|
|
|
|
int refPos = 0;
|
|
|
|
|
int alignPos = 0;
|
|
|
|
|
|
|
|
|
|
for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) {
|
|
|
|
|
|
|
|
|
|
final CigarElement ce = cigar.getCigarElement(iii);
|
|
|
|
|
|
|
|
|
|
switch( ce.getOperator() ) {
|
|
|
|
|
case I:
|
|
|
|
|
case S:
|
|
|
|
|
for ( int jjj = 0 ; jjj < ce.getLength(); jjj++ ) {
|
|
|
|
|
alignment[alignPos++] = '+';
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case D:
|
|
|
|
|
case N:
|
|
|
|
|
refPos++;
|
|
|
|
|
break;
|
|
|
|
|
case M:
|
|
|
|
|
for ( int jjj = 0 ; jjj < ce.getLength(); jjj++ ) {
|
|
|
|
|
alignment[alignPos] = ref[refPos];
|
|
|
|
|
alignPos++;
|
|
|
|
|
refPos++;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
throw new StingException( "Unsupported cigar operator: " + ce.getOperator() );
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return alignment;
|
|
|
|
|
}
|
|
|
|
|
|
2009-09-03 01:00:39 +08:00
|
|
|
/**
|
|
|
|
|
* Due to (unfortunate) multiple ways to indicate that read is unmapped allowed by SAM format
|
|
|
|
|
* specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and
|
|
|
|
|
* alignment reference index/start.
|
2010-03-24 23:21:58 +08:00
|
|
|
* @param r record
|
|
|
|
|
* @return true if read is unmapped
|
2009-09-03 01:00:39 +08:00
|
|
|
*/
|
|
|
|
|
public static boolean isReadUnmapped(final SAMRecord r) {
|
|
|
|
|
if ( r.getReadUnmappedFlag() ) return true;
|
2009-09-15 00:04:19 +08:00
|
|
|
|
|
|
|
|
// our life would be so much easier if all sam files followed the specs. In reality,
|
|
|
|
|
// sam files (including those generated by maq or bwa) miss headers alltogether. When
|
|
|
|
|
// reading such a SAM file, reference name is set, but since there is no sequence dictionary,
|
|
|
|
|
// null is always returned for referenceIndex. Let's be paranoid here, and make sure that
|
|
|
|
|
// we do not call the read "unmapped" when it has only reference name set with ref. index missing
|
|
|
|
|
// or vice versa.
|
|
|
|
|
if ( ( r.getReferenceIndex() != null && r.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX
|
|
|
|
|
|| r.getReferenceName() != null && r.getReferenceName() != SAMRecord.NO_ALIGNMENT_REFERENCE_NAME )
|
|
|
|
|
&& r.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START ) return false ;
|
|
|
|
|
return true;
|
2009-09-03 01:00:39 +08:00
|
|
|
}
|
2010-04-16 08:15:57 +08:00
|
|
|
|
|
|
|
|
/** Returns the array of base qualitites in the order the bases were read on the machine (i.e. always starting from
|
|
|
|
|
* cycle 1). In other words, if the read is unmapped or aligned in the forward direction, the read's own base
|
|
|
|
|
* qualities are returned as stored in the SAM record; if the read is aligned in the reverse direction, the array
|
|
|
|
|
* of read's base qualitites is inverted (in this case new array is allocated and returned).
|
|
|
|
|
* @param read
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public static byte [] getQualsInCycleOrder(SAMRecord read) {
|
|
|
|
|
if ( isReadUnmapped(read) || ! read.getReadNegativeStrandFlag() ) return read.getBaseQualities();
|
|
|
|
|
|
|
|
|
|
return BaseUtils.reverse(read.getBaseQualities());
|
|
|
|
|
}
|
2009-03-25 13:48:10 +08:00
|
|
|
}
|