From 385736469c70832152b1c2901c1615f13d437846 Mon Sep 17 00:00:00 2001 From: depristo Date: Wed, 1 Apr 2009 00:47:47 +0000 Subject: [PATCH] High performance pileup code and utilities git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@242 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/walkers/PileupWalker.java | 14 ++----- .../org/broadinstitute/sting/utils/Utils.java | 40 +++++++++++++++++++ 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index a6063e0d1..4cebc31d7 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -4,9 +4,11 @@ import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.Utils; import net.sf.samtools.SAMRecord; import java.util.List; +import java.util.ArrayList; /** * Created by IntelliJ IDEA. @@ -26,19 +28,11 @@ public class PileupWalker extends LocusWalker { return true; // We are keeping all the reads } - // Map over the org.broadinstitute.sting.gatk.LocusContext public Integer map(List rodData, char ref, LocusContext context) { List reads = context.getReads(); List offsets = context.getOffsets(); - String bases = ""; - String quals = ""; - for ( int i = 0; i < reads.size(); i++ ) { - SAMRecord read = reads.get(i); - int offset = offsets.get(i); - - bases += read.getReadString().charAt(offset); - quals += read.getBaseQualityString().charAt(offset); - } + String bases = Utils.basePileupAsString(reads, offsets); + String quals = Utils.qualPileupAsString(reads, offsets); if ( bases.equals("") && FLAG_UNCOVERED_BASES ) { bases = "*** UNCOVERED SITE ***"; diff --git a/java/src/org/broadinstitute/sting/utils/Utils.java b/java/src/org/broadinstitute/sting/utils/Utils.java index 65eaa45c1..09b0e86be 100755 --- a/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/java/src/org/broadinstitute/sting/utils/Utils.java @@ -118,6 +118,46 @@ public class Utils { return c; } + public static String basePileupAsString( List reads, List offsets ) { + StringBuilder bases = new StringBuilder(); + for ( byte base : basePileup(reads, offsets)) { + bases.append((char)base); + } + return bases.toString(); + } + + public static ArrayList basePileup( List reads, List offsets ) { + ArrayList bases = new ArrayList(reads.size()); + for ( int i = 0; i < reads.size(); i++ ) { + SAMRecord read = reads.get(i); + int offset = offsets.get(i); + bases.add(read.getReadBases()[offset]); + } + return bases; + } + + public static ArrayList qualPileup( List reads, List offsets ) { + ArrayList quals = new ArrayList(reads.size()); + for ( int i = 0; i < reads.size(); i++ ) { + SAMRecord read = reads.get(i); + int offset = offsets.get(i); + byte qual = (byte)read.getBaseQualities()[offset]; + quals.add(qual); + } + return quals; + } + + public static String qualPileupAsString( List reads, List offsets ) { + StringBuilder quals = new StringBuilder(); + for ( int qual : qualPileup(reads, offsets)) { + qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea + char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63 + quals.append(qualChar); + } + + return quals.toString(); + } + public static ArrayList subseq(byte[] fullArray) { return subseq(fullArray, 0, fullArray.length); }