From 9dada95ec325fe0b4d206835c9883de4192ca4f4 Mon Sep 17 00:00:00 2001 From: sjia Date: Fri, 31 Jul 2009 16:21:16 +0000 Subject: [PATCH] git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1357 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/HLAcaller/CallHLAWalker.java | 163 ++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java new file mode 100644 index 000000000..eb5c2bb0a --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CallHLAWalker.java @@ -0,0 +1,163 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; + +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods; +import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.gatk.walkers.*; +import java.io.*; +import java.util.*; +/** + * + * @author shermanjia + */ +public class CallHLAWalker extends LocusWalker>{ + @Argument(fullName="suppressLocusPrinting",doc="Suppress printing",required=false) + public boolean suppressPrinting = false; + + //String HLAdatabaseFile = "/Users/shermanjia/Work/HLA.sam"; + String HLAdatabaseFile ="/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA.sam"; + + + ArrayList HLAreads = new ArrayList(); + ArrayList HLAnames = new ArrayList(); + ArrayList HLApositions = new ArrayList(); + int[] HLAstartpos; + int[] HLAstoppos; + int numHLAlleles = 0; + double[][] Prob; String[][] Alleles; + boolean THREE_BASE_ERRORS = false; + String PRIORS_2ND_ON = "0.000,0.302,0.366,0.142,0.000,0.548,0.370,0.000,0.319,0.000"; + String PRIORS_2ND_OFF = "0.480,0.769,0.744,0.538,0.575,0.727,0.768,0.589,0.762,0.505"; + double[] p2ndon = priorsArray(PRIORS_2ND_ON); + double[] p2ndoff = priorsArray(PRIORS_2ND_OFF); + boolean keepQ0Bases = false; + + //HLAreads.add("Italian Riviera"); + + public Pair reduceInit() { + try{ + FileInputStream fstream = new FileInputStream(HLAdatabaseFile); + // Get the object of DataInputStream + DataInputStream in = new DataInputStream(fstream); + BufferedReader br = new BufferedReader(new InputStreamReader(in)); + String strLine; String [] s = null; + //Read File Line By Line + while ((strLine = br.readLine()) != null) { + s = strLine.split("\\t"); + if (s.length>=10){ + HLAreads.add(s[9]); + HLAnames.add(s[0]); + HLApositions.add(s[3]); + //System.out.println (s[0] + "\t" + s[3]); + } + } + in.close(); + int n = HLApositions.size(); + numHLAlleles = n; + HLAstartpos = new int[n]; HLAstoppos = new int[n]; + Prob = new double[n][n]; + for (int i = 0; i < n; i++){ + HLAstartpos[i]=Integer.parseInt(HLApositions.get(i)); + HLAstoppos[i]=HLAstartpos[i]+HLAreads.get(i).length()-1; + for (int j = 0; j (0l,0l); + } + + + public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) { + ReadBackedPileup pileup = new ReadBackedPileup(ref, context); + String bases = pileup.getBases(); + long loc = context.getPosition(); + if ( !suppressPrinting ) + out.printf("%s: %d\t", context.getLocation(),pileup.size() ); + out.printf("ref=%s\t", ref); + if( context.getReads().size() > 0 ) { + //out.printf("RG for first read: %s%n",context.getReads().get(0).getReadName()); + int numAs = 0; + int numCs = 0; + int numGs = 0; + int numTs = 0; + char c = 'a'; + + for (int i = 0;i < context.getReads().size();i++){ + //out.printf("%s%s ",i,bases.charAt(i)); + char base = bases.charAt(i); + if (base == 'A'){numAs++;} + if (base == 'C'){numCs++;} + if (base == 'T'){numTs++;} + if (base == 'G'){numGs++;} + + } + out.printf("%sAs\t%sCs\t%sTs\t%sGs\t",numAs,numCs,numTs,numGs); + + GenotypeLikelihoods G = new GenotypeLikelihoods(THREE_BASE_ERRORS,0.999,0.000333,0.000667, p2ndon, p2ndoff, keepQ0Bases); + SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup); + + + double mLikelihoods[] = geno.getLikelihoods(); + List mGenotypes = geno.getGenotypes(); + for (int j =0; j < geno.getBases().length();j++){ + out.printf("%s %s\t",mGenotypes.get(j).getBases(),mLikelihoods[j]); + } + + + for (int j = 0; j < numHLAlleles; j++){ + //out.print(loc + "," + HLAstartpos[j] + "," + HLAstoppos[j]); + long i = loc - HLAstartpos[j]; + if (loc >= HLAstartpos[j] && loc <= HLAstoppos[j]){ + c = HLAreads.get(j).charAt((int) i); + //out.printf("%s",c); + } + } + out.print("\n"); + //out.printf("RG for first read: %s%n",context.getReads().get(0).getReadName()); + + } + return context.getReads().size(); + + } + + private double[] priorsArray(String priorsString) { + String[] pstrs = priorsString.split(","); + double[] pdbls = new double[pstrs.length]; + + for (int i = 0; i < pstrs.length; i++) { + pdbls[i] = Double.valueOf(pstrs[i]); + } + + return pdbls; + } + + public boolean isReduceByInterval() { + return true; + } + + public Pair reduce(Integer value, Pair sum) { + long left = value.longValue() + sum.getFirst(); + long right = sum.getSecond() + 1l; + return new Pair(left, right); + } + + public void onTraversalDone(Pair result) { + out.printf("Average depth of coverage is: %.2f in %d total coverage over %d sites\n", + ((double)result.getFirst() / (double)result.getSecond()), result.getFirst(), result.getSecond()); + } +}