From 9422156e091f626f2bdd35d047b3bbe8e9042aa0 Mon Sep 17 00:00:00 2001 From: sjia Date: Thu, 24 Sep 2009 03:12:20 +0000 Subject: [PATCH] Finds closest allele for each read in bam file git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1712 348d0f76-0448-11de-a6fe-93d51630548a --- .../HLAcaller/FindClosestAlleleWalker.java | 66 ++----------------- 1 file changed, 4 insertions(+), 62 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/FindClosestAlleleWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/FindClosestAlleleWalker.java index 0cbb120a7..9c2c712db 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/FindClosestAlleleWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/FindClosestAlleleWalker.java @@ -5,6 +5,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.playground.gatk.walkers.HLAcaller.ReadCigarFormatter; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.*; @@ -40,7 +41,7 @@ public class FindClosestAlleleWalker extends ReadWalker { Hashtable AlleleFrequencies = new Hashtable(); int iAstart = -1, iAstop = -1, iBstart = -1, iBstop = -1, iCstart = -1, iCstop = -1; - + ReadCigarFormatter formatter = new ReadCigarFormatter(); public Integer reduceInit() { if (!DatabaseLoaded){ @@ -56,7 +57,7 @@ public class FindClosestAlleleWalker extends ReadWalker { s = strLine.split("\\t"); if (s.length>=10){ //Parse the reads with cigar parser - HLAreads.add(CigarFormatted(s[5],s[9])); + HLAreads.add(formatter.FormatRead(s[5],s[9])); HLAcigars.add(s[5]); HLAnames.add(s[0]); HLApositions.add(s[3]); @@ -120,65 +121,6 @@ public class FindClosestAlleleWalker extends ReadWalker { return 0; } - private String CigarFormatted(String cigar, String read){ - // returns a cigar-formatted sequence (removes insertions, inserts 'D' to where deletions occur - String formattedRead = ""; char c; String count; - int cigarPlaceholder = 0; int subcigarLength = 0; - int readPlaceholder = 0; int subreadLength = 0; - - //reads cigar string - for (int i = 0; i < cigar.length(); i++){ - c = cigar.charAt(i); - if (c == 'M'){ - //If reach M for match/mismatch, get number immediately preceeding 'M' and tack on that many characters to sequence - subcigarLength = i-cigarPlaceholder; - count = cigar.substring(cigarPlaceholder, i); - - subreadLength = Integer.parseInt(count); - formattedRead = formattedRead + read.substring(readPlaceholder, readPlaceholder+subreadLength); - - //increment placeholders - cigarPlaceholder = i+1; - readPlaceholder = readPlaceholder + subreadLength; - } else if (c == 'I'){ - //***NOTE: To be modified later if needed (insertions removed here)*** - - //If reaches I for insertion, get number before 'I' and skip that many characters in sequence - count = cigar.substring(cigarPlaceholder, i); - subreadLength = Integer.parseInt(count); - - //increment placeholders without adding inserted bases to sequence (effectively removes insertion). - cigarPlaceholder = i+1; - readPlaceholder = readPlaceholder + subreadLength; - } else if (c == 'H' || c == 'S'){ - //(H = Headers or S = Soft clipped removed here)*** - - //If reaches H for insertion, get number before 'H' and skip that many characters in sequence - count = cigar.substring(cigarPlaceholder, i); - subreadLength = Integer.parseInt(count); - - //increment cigar placeholder without adding inserted bases to sequence (effectively removes insertion). - cigarPlaceholder = i+1; - } else if (c == 'D'){ - //If reaches D for deletion, insert 'D' into sequence as placeholder - count = cigar.substring(cigarPlaceholder, i); - subreadLength = Integer.parseInt(count); - - //Add one 'D' for each deleted base - String deletion = ""; - for (int j = 1; j <= subreadLength; j++){ - deletion = deletion + "D"; - } - - //update placeholders - formattedRead = formattedRead + deletion; - cigarPlaceholder = i+1; - } - - } - return formattedRead; - } - public Integer map(char[] ref, SAMRecord read) { int readstart = read.getAlignmentStart(); @@ -187,7 +129,7 @@ public class FindClosestAlleleWalker extends ReadWalker { double[] concordance = new double[HLAreads.size()]; double[] numcompared = new double[HLAreads.size()]; double maxConcordance = 0; - String s1 = CigarFormatted(read.getCigarString(), read.getReadString()); + String s1 = formatter.FormatRead(read.getCigarString(), read.getReadString()); char c1, c2; String s2 = "", name = "";