From 72825c4848fe39d6f7a231becaa07cb2dc6c0c63 Mon Sep 17 00:00:00 2001 From: mmelgar Date: Fri, 13 Nov 2009 02:11:23 +0000 Subject: [PATCH] A walker that generates a table of secondary base counts in a bam file. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2031 348d0f76-0448-11de-a6fe-93d51630548a --- .../SecondaryBaseTransitionTableWalker.java | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/secondaryBases/SecondaryBaseTransitionTableWalker.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/secondaryBases/SecondaryBaseTransitionTableWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/secondaryBases/SecondaryBaseTransitionTableWalker.java new file mode 100755 index 000000000..5fb627ce9 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/secondaryBases/SecondaryBaseTransitionTableWalker.java @@ -0,0 +1,114 @@ +package org.broadinstitute.sting.playground.gatk.walkers.secondaryBases; + +import net.sf.picard.reference.ReferenceSequence; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.IntervalRod; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; + +import javax.xml.transform.Result; +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; + + +/** + * Created by IntelliJ IDEA. + * User: michael + * Date: Nov 2, 2009 + * Time: 8:45:32 PM + * To change this template use File | Settings | File Templates. + */ +@Reference(window=@Window(start=-1,stop=1)) +public class SecondaryBaseTransitionTableWalker extends LocusWalker { + + HashMap counts = new HashMap(); + public IndexedFastaSequenceFile refSeq; + + /*public void initialize() { + File refFile = this.getToolkit().getArguments().referenceFile; + + try { + refSeq = new IndexedFastaSequenceFile(refFile); + } catch (IOException e) { + refSeq = null; + } + }*/ + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + String refBase = Character.toString(ref.getBase()); + ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(),context); + String primaryBases = pileup.getBasesWithStrand(); + String secondaryBases = pileup.getSecondaryBasePileup(); + String contextBases = new String(ref.getBases()); + + /*if (refSeq != null) { + long startPos = context.getPosition() - 1; + long stopPos = context.getPosition() + 1; + + ReferenceSequence prevRefSequence = refSeq.getSubsequenceAt(context.getContig(), startPos, context.getPosition() - 1); + ReferenceSequence nextRefSequence = refSeq.getSubsequenceAt(context.getContig(), context.getPosition() + 1, stopPos); + + String prev = new String(prevRefSequence.getBases()); + String next = new String(nextRefSequence.getBases()); + String total = new String(refSeq.getSubsequenceAt(context.getContig(),startPos,stopPos).getBases()); + out.println(total + " " + prev + " " + refBase + " " + next); + }*/ + + String precedingBase = contextBases.substring(0,1); + String nextBase = contextBases.substring(2); + /*out.println(contextBases + " " + precedingBase + " " + refBase + " " + nextBase);*/ + /*out.println(" ");*/ + + boolean rods = false; + for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) { + if (datum != null && !(datum instanceof IntervalRod)) { + rods = true;} + } + + if (!rods && precedingBase != null && secondaryBases != null) { + for (int i = 0; i < primaryBases.length(); i ++) { + if (secondaryBases.charAt(i) != 'N' && secondaryBases.charAt(i) != '.' && Character.toUpperCase(primaryBases.charAt(i)) != 'N') { + String quenchingBase; + if (primaryBases.charAt(i) == Character.toUpperCase(primaryBases.charAt(i))) { + quenchingBase = precedingBase; + } + else { + quenchingBase = nextBase; + } + String reference = Character.toString(Character.toUpperCase(refBase.charAt(0))); + String primary = Character.toString(Character.toUpperCase(primaryBases.charAt(i))); + String quencher = Character.toString(Character.toUpperCase(quenchingBase.charAt(0))); + String secondary = Character.toString(Character.toUpperCase(secondaryBases.charAt(i))); + String key = reference + primary + quencher + secondary; + if (counts.containsKey(key)) { + counts.put(key, counts.get(key) + Long.valueOf(1)); + } + else { + counts.put(key, Long.valueOf(1)); + } + } + } + } + return 1; + } + + public Integer reduceInit() {return 0;} + + public Integer reduce(Integer value, Integer sum) {return sum + value;} + + public void onTraversalDone(Integer result) { + out.println("ReferenceBase \tPrimaryBase \tPreviousBase \tSecondaryBase \tCount"); + for (String key : counts.keySet()) { + out.println(key.charAt(0)+"\t"+key.charAt(1)+"\t"+key.charAt(2)+"\t"+key.charAt(3)+"\t"+counts.get(key)); + } + } +}