Finds closest allele for each read in bam file
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1712 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5c5151c4e7
commit
9422156e09
|
|
@ -5,6 +5,7 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
|
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.HLAcaller.ReadCigarFormatter;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
|
||||||
|
|
@ -40,7 +41,7 @@ public class FindClosestAlleleWalker extends ReadWalker<Integer, Integer> {
|
||||||
|
|
||||||
Hashtable AlleleFrequencies = new Hashtable();
|
Hashtable AlleleFrequencies = new Hashtable();
|
||||||
int iAstart = -1, iAstop = -1, iBstart = -1, iBstop = -1, iCstart = -1, iCstop = -1;
|
int iAstart = -1, iAstop = -1, iBstart = -1, iBstop = -1, iCstart = -1, iCstop = -1;
|
||||||
|
ReadCigarFormatter formatter = new ReadCigarFormatter();
|
||||||
|
|
||||||
public Integer reduceInit() {
|
public Integer reduceInit() {
|
||||||
if (!DatabaseLoaded){
|
if (!DatabaseLoaded){
|
||||||
|
|
@ -56,7 +57,7 @@ public class FindClosestAlleleWalker extends ReadWalker<Integer, Integer> {
|
||||||
s = strLine.split("\\t");
|
s = strLine.split("\\t");
|
||||||
if (s.length>=10){
|
if (s.length>=10){
|
||||||
//Parse the reads with cigar parser
|
//Parse the reads with cigar parser
|
||||||
HLAreads.add(CigarFormatted(s[5],s[9]));
|
HLAreads.add(formatter.FormatRead(s[5],s[9]));
|
||||||
HLAcigars.add(s[5]);
|
HLAcigars.add(s[5]);
|
||||||
HLAnames.add(s[0]);
|
HLAnames.add(s[0]);
|
||||||
HLApositions.add(s[3]);
|
HLApositions.add(s[3]);
|
||||||
|
|
@ -120,65 +121,6 @@ public class FindClosestAlleleWalker extends ReadWalker<Integer, Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String CigarFormatted(String cigar, String read){
|
|
||||||
// returns a cigar-formatted sequence (removes insertions, inserts 'D' to where deletions occur
|
|
||||||
String formattedRead = ""; char c; String count;
|
|
||||||
int cigarPlaceholder = 0; int subcigarLength = 0;
|
|
||||||
int readPlaceholder = 0; int subreadLength = 0;
|
|
||||||
|
|
||||||
//reads cigar string
|
|
||||||
for (int i = 0; i < cigar.length(); i++){
|
|
||||||
c = cigar.charAt(i);
|
|
||||||
if (c == 'M'){
|
|
||||||
//If reach M for match/mismatch, get number immediately preceeding 'M' and tack on that many characters to sequence
|
|
||||||
subcigarLength = i-cigarPlaceholder;
|
|
||||||
count = cigar.substring(cigarPlaceholder, i);
|
|
||||||
|
|
||||||
subreadLength = Integer.parseInt(count);
|
|
||||||
formattedRead = formattedRead + read.substring(readPlaceholder, readPlaceholder+subreadLength);
|
|
||||||
|
|
||||||
//increment placeholders
|
|
||||||
cigarPlaceholder = i+1;
|
|
||||||
readPlaceholder = readPlaceholder + subreadLength;
|
|
||||||
} else if (c == 'I'){
|
|
||||||
//***NOTE: To be modified later if needed (insertions removed here)***
|
|
||||||
|
|
||||||
//If reaches I for insertion, get number before 'I' and skip that many characters in sequence
|
|
||||||
count = cigar.substring(cigarPlaceholder, i);
|
|
||||||
subreadLength = Integer.parseInt(count);
|
|
||||||
|
|
||||||
//increment placeholders without adding inserted bases to sequence (effectively removes insertion).
|
|
||||||
cigarPlaceholder = i+1;
|
|
||||||
readPlaceholder = readPlaceholder + subreadLength;
|
|
||||||
} else if (c == 'H' || c == 'S'){
|
|
||||||
//(H = Headers or S = Soft clipped removed here)***
|
|
||||||
|
|
||||||
//If reaches H for insertion, get number before 'H' and skip that many characters in sequence
|
|
||||||
count = cigar.substring(cigarPlaceholder, i);
|
|
||||||
subreadLength = Integer.parseInt(count);
|
|
||||||
|
|
||||||
//increment cigar placeholder without adding inserted bases to sequence (effectively removes insertion).
|
|
||||||
cigarPlaceholder = i+1;
|
|
||||||
} else if (c == 'D'){
|
|
||||||
//If reaches D for deletion, insert 'D' into sequence as placeholder
|
|
||||||
count = cigar.substring(cigarPlaceholder, i);
|
|
||||||
subreadLength = Integer.parseInt(count);
|
|
||||||
|
|
||||||
//Add one 'D' for each deleted base
|
|
||||||
String deletion = "";
|
|
||||||
for (int j = 1; j <= subreadLength; j++){
|
|
||||||
deletion = deletion + "D";
|
|
||||||
}
|
|
||||||
|
|
||||||
//update placeholders
|
|
||||||
formattedRead = formattedRead + deletion;
|
|
||||||
cigarPlaceholder = i+1;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
return formattedRead;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public Integer map(char[] ref, SAMRecord read) {
|
public Integer map(char[] ref, SAMRecord read) {
|
||||||
int readstart = read.getAlignmentStart();
|
int readstart = read.getAlignmentStart();
|
||||||
|
|
@ -187,7 +129,7 @@ public class FindClosestAlleleWalker extends ReadWalker<Integer, Integer> {
|
||||||
double[] concordance = new double[HLAreads.size()];
|
double[] concordance = new double[HLAreads.size()];
|
||||||
double[] numcompared = new double[HLAreads.size()];
|
double[] numcompared = new double[HLAreads.size()];
|
||||||
double maxConcordance = 0;
|
double maxConcordance = 0;
|
||||||
String s1 = CigarFormatted(read.getCigarString(), read.getReadString());
|
String s1 = formatter.FormatRead(read.getCigarString(), read.getReadString());
|
||||||
char c1, c2;
|
char c1, c2;
|
||||||
String s2 = "", name = "";
|
String s2 = "", name = "";
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue