Reads cigar files

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1713 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
sjia 2009-09-24 03:14:10 +00:00
parent 9422156e09
commit 1ee8ba590c
1 changed files with 75 additions and 0 deletions

View File

@ -0,0 +1,75 @@
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
/**
* Returns formatted read given read string and cigar string
* Essentially removes header bases, soft clipped bases, and currently removes insertions
* Deletions coded as "D"
*
* @author shermanjia
*/
public class ReadCigarFormatter {
public String FormatRead(String cigar, String read){
// returns a cigar-formatted sequence (removes insertions, inserts 'D' to where deletions occur
String formattedRead = ""; char c; String count;
int cigarPlaceholder = 0; int subcigarLength = 0;
int readPlaceholder = 0; int subreadLength = 0;
//reads cigar string
for (int i = 0; i < cigar.length(); i++){
c = cigar.charAt(i);
if (c == 'M'){
//If reach M for match/mismatch, get number immediately preceeding 'M' and tack on that many characters to sequence
subcigarLength = i-cigarPlaceholder;
count = cigar.substring(cigarPlaceholder, i);
subreadLength = Integer.parseInt(count);
formattedRead = formattedRead + read.substring(readPlaceholder, readPlaceholder+subreadLength);
//increment placeholders
cigarPlaceholder = i+1;
readPlaceholder = readPlaceholder + subreadLength;
} else if (c == 'I'){
//***NOTE: To be modified later if needed (insertions removed here)***
//If reaches I for insertion, get number before 'I' and skip that many characters in sequence
count = cigar.substring(cigarPlaceholder, i);
subreadLength = Integer.parseInt(count);
//increment placeholders without adding inserted bases to sequence (effectively removes insertion).
cigarPlaceholder = i+1;
readPlaceholder = readPlaceholder + subreadLength;
} else if (c == 'H' || c == 'S'){
//(H = Headers or S = Soft clipped removed here)***
//If reaches H for insertion, get number before 'H' and skip that many characters in sequence
count = cigar.substring(cigarPlaceholder, i);
subreadLength = Integer.parseInt(count);
//increment cigar placeholder without adding inserted bases to sequence (effectively removes insertion).
cigarPlaceholder = i+1;
} else if (c == 'D'){
//If reaches D for deletion, insert 'D' into sequence as placeholder
count = cigar.substring(cigarPlaceholder, i);
subreadLength = Integer.parseInt(count);
//Add one 'D' for each deleted base
String deletion = "";
for (int j = 1; j <= subreadLength; j++){
deletion = deletion + "D";
}
//update placeholders
formattedRead = formattedRead + deletion;
cigarPlaceholder = i+1;
}
}
return formattedRead;
}
}