extractIndels() now should deal correctly with soft- and hard-clipped bases

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@936 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-06-08 16:04:49 +00:00
parent a8a2d0eab9
commit b4ef16ced2
1 changed files with 59 additions and 7 deletions

View File

@ -163,8 +163,8 @@ public class AlignmentUtils {
/** Reads through the alignment cigar and returns all indels found in the alignment as a collection /** Reads through the alignment cigar and returns all indels found in the alignment as a collection
* of Indel objects. * of Indel objects.
* @param c * @param c alignment cigar
* @param start * @param start alignment start. NOTE: if cigar starts with clipped bases (S), alignment start position should be set after them according to SAM format specification.
* @return * @return
*/ */
public static Collection<Indel> extractIndels(final Cigar c, final int start) { public static Collection<Indel> extractIndels(final Cigar c, final int start) {
@ -202,17 +202,20 @@ public class AlignmentUtils {
switch(ce.getOperator()) { switch(ce.getOperator()) {
case I: case I:
curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.I); curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.I);
if ( i == 0 ) System.out.println("WARNING: Indel at start!"); if ( i == 0 ) System.out.println("WARNING: Indel at start of the read");
if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end!"); if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end of the read");
break; break;
case D: curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.D); case D: curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.D);
if ( i == 0 ) System.out.println("WARNING: Indel at start!"); if ( i == 0 ) System.out.println("WARNING: Indel at start of the read");
if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end!"); if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end of the read");
runninglength += ce.getLength(); runninglength += ce.getLength();
break; break;
case M: runninglength += ce.getLength(); break; // advance along the gapless block in the alignment case M: runninglength += ce.getLength(); break; // advance along the gapless block in the alignment
case H:
case S: break; // just skip hard and soft-clipped bases; according to SAM format specification, alignment start position on the reference
// points to where the actually aligned (not clipped) bases go, so we do not need to increment reference position here
default : default :
throw new IllegalArgumentException("Unexpected operator in cigar string"); throw new IllegalArgumentException("Unexpected operator in cigar string: "+ce.getOperator());
} }
if ( curr_indel == null ) continue; // element was not an indel, go grab next element... if ( curr_indel == null ) continue; // element was not an indel, go grab next element...
@ -269,4 +272,53 @@ public class AlignmentUtils {
} }
return b.toString(); return b.toString();
} }
public static String alignmentToString(final Cigar cigar,final String seq, final String ref, final int posOnRef ) {
return alignmentToString( cigar, seq, ref, posOnRef, 0 );
}
public static String alignmentToString(final Cigar cigar,final String seq, final String ref, final int posOnRef, final int posOnRead ) {
int readPos = posOnRead;
int refPos = posOnRef;
StringBuilder refLine = new StringBuilder();
StringBuilder readLine = new StringBuilder();
for ( int i = 0 ; i < posOnRead ; i++ ) {
refLine.append( ref.charAt( refPos - readPos + i ) );
readLine.append( seq.charAt(i) ) ;
}
for ( int i = 0 ; i < cigar.numCigarElements() ; i++ ) {
final CigarElement ce = cigar.getCigarElement(i);
switch(ce.getOperator()) {
case I:
for ( int j = 0 ; j < ce.getLength(); j++ ) {
refLine.append('+');
readLine.append( seq.charAt( readPos++ ) );
}
break;
case D:
for ( int j = 0 ; j < ce.getLength(); j++ ) {
readLine.append('*');
refLine.append( ref.charAt( refPos++ ) );
}
break;
case M:
for ( int j = 0 ; j < ce.getLength(); j++ ) {
refLine.append(ref.charAt( refPos++ ) );
readLine.append( seq.charAt( readPos++ ) );
}
break;
default: throw new StingException("Unsupported cigar operator: "+ce.getOperator() );
}
}
refLine.append('\n');
refLine.append(readLine);
refLine.append('\n');
return refLine.toString();
}
} }