Reads cigar files
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1713 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9422156e09
commit
1ee8ba590c
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* To change this template, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
|
||||
|
||||
/**
|
||||
* Returns formatted read given read string and cigar string
|
||||
* Essentially removes header bases, soft clipped bases, and currently removes insertions
|
||||
* Deletions coded as "D"
|
||||
*
|
||||
* @author shermanjia
|
||||
*/
|
||||
public class ReadCigarFormatter {
|
||||
public String FormatRead(String cigar, String read){
|
||||
// returns a cigar-formatted sequence (removes insertions, inserts 'D' to where deletions occur
|
||||
String formattedRead = ""; char c; String count;
|
||||
int cigarPlaceholder = 0; int subcigarLength = 0;
|
||||
int readPlaceholder = 0; int subreadLength = 0;
|
||||
|
||||
//reads cigar string
|
||||
for (int i = 0; i < cigar.length(); i++){
|
||||
c = cigar.charAt(i);
|
||||
if (c == 'M'){
|
||||
//If reach M for match/mismatch, get number immediately preceeding 'M' and tack on that many characters to sequence
|
||||
subcigarLength = i-cigarPlaceholder;
|
||||
count = cigar.substring(cigarPlaceholder, i);
|
||||
|
||||
subreadLength = Integer.parseInt(count);
|
||||
formattedRead = formattedRead + read.substring(readPlaceholder, readPlaceholder+subreadLength);
|
||||
|
||||
//increment placeholders
|
||||
cigarPlaceholder = i+1;
|
||||
readPlaceholder = readPlaceholder + subreadLength;
|
||||
} else if (c == 'I'){
|
||||
//***NOTE: To be modified later if needed (insertions removed here)***
|
||||
|
||||
//If reaches I for insertion, get number before 'I' and skip that many characters in sequence
|
||||
count = cigar.substring(cigarPlaceholder, i);
|
||||
subreadLength = Integer.parseInt(count);
|
||||
|
||||
//increment placeholders without adding inserted bases to sequence (effectively removes insertion).
|
||||
cigarPlaceholder = i+1;
|
||||
readPlaceholder = readPlaceholder + subreadLength;
|
||||
} else if (c == 'H' || c == 'S'){
|
||||
//(H = Headers or S = Soft clipped removed here)***
|
||||
|
||||
//If reaches H for insertion, get number before 'H' and skip that many characters in sequence
|
||||
count = cigar.substring(cigarPlaceholder, i);
|
||||
subreadLength = Integer.parseInt(count);
|
||||
|
||||
//increment cigar placeholder without adding inserted bases to sequence (effectively removes insertion).
|
||||
cigarPlaceholder = i+1;
|
||||
} else if (c == 'D'){
|
||||
//If reaches D for deletion, insert 'D' into sequence as placeholder
|
||||
count = cigar.substring(cigarPlaceholder, i);
|
||||
subreadLength = Integer.parseInt(count);
|
||||
|
||||
//Add one 'D' for each deleted base
|
||||
String deletion = "";
|
||||
for (int j = 1; j <= subreadLength; j++){
|
||||
deletion = deletion + "D";
|
||||
}
|
||||
|
||||
//update placeholders
|
||||
formattedRead = formattedRead + deletion;
|
||||
cigarPlaceholder = i+1;
|
||||
}
|
||||
|
||||
}
|
||||
return formattedRead;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue