From 0a707a887b0ff43b55140264fda9f1cda264c987 Mon Sep 17 00:00:00 2001 From: kiran Date: Wed, 29 Apr 2009 20:05:36 +0000 Subject: [PATCH] Added ability to evaluate best + random base. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@564 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/ReadErrorRateWalker.java | 76 +++++++++---------- 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadErrorRateWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadErrorRateWalker.java index f9049b354..6fad04235 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadErrorRateWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadErrorRateWalker.java @@ -4,24 +4,25 @@ import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.BaseUtils; import net.sf.samtools.SAMRecord; import edu.mit.broad.picard.reference.ReferenceSequence; +import java.util.Random; + /** * ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing the * read to its home on the reference and noting the mismatch rate. It ignores reads with - * indels in them, treats high and low-quality references bases the same, and does not count + * indels in them, treats high and low-quality reference bases the same, and does not count * ambiguous bases as mismatches. It's also thread-safe, so you can process a slew of reads * in short order. * * @author Kiran Garimella */ public class ReadErrorRateWalker extends ReadWalker { - @Argument(fullName="useNextBestBase",required=false,defaultValue="false") - public boolean useNextBestBase; - - @Argument(fullName="printVisualHits",required=false,defaultValue="false") - public boolean printVisualHits; + @Argument(fullName="printVisualHits", shortName="v", required=false, defaultValue="false") public boolean printVisualHits; + @Argument(fullName="useNextBestBase", shortName="nb", required=false, defaultValue="false") public boolean useNextBestBase; + @Argument(fullName="useNextRandomBase", shortName="nr", required=false, defaultValue="false") public boolean useNextRandomBase; /** * Ignore reads with indels or clipping @@ -61,19 +62,7 @@ public class ReadErrorRateWalker extends ReadWalker { System.out.println(); for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) { - byte compBase; - - switch (contig[offset]) { - case 'A': - case 'a': compBase = 'A'; break; - case 'C': - case 'c': compBase = 'C'; break; - case 'G': - case 'g': compBase = 'G'; break; - case 'T': - case 't': compBase = 'T'; break; - default: compBase = '.'; break; - } + byte compBase = convertIUPACBaseToSimpleBase(contig[offset]); System.out.print((char) compBase); } @@ -81,30 +70,19 @@ public class ReadErrorRateWalker extends ReadWalker { } for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) { - byte compBase; - - switch (contig[offset]) { - case 'A': - case 'a': compBase = 'A'; break; - case 'C': - case 'c': compBase = 'C'; break; - case 'G': - case 'g': compBase = 'G'; break; - case 'T': - case 't': compBase = 'T'; break; - default: compBase = '.'; break; - } + byte compBase = convertIUPACBaseToSimpleBase(contig[offset]); if (compBase != '.') { - if (useNextBestBase) { - int nextBestBaseIndex = QualityUtils.compressedQualityToBaseIndex(sq[cycle]); + if (useNextBestBase || useNextRandomBase) { byte nextBestBase; - switch (nextBestBaseIndex) { - case 0: nextBestBase = 'A'; break; - case 1: nextBestBase = 'C'; break; - case 2: nextBestBase = 'G'; break; - case 3: nextBestBase = 'T'; break; - default: nextBestBase = '.'; break; + if (useNextBestBase) { + nextBestBase = (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(sq[cycle])); + } else { + nextBestBase = bases[cycle]; + Random generator = new Random(); + while (nextBestBase == bases[cycle]) { + nextBestBase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4)); + } } if (nextBestBase != '.') { @@ -131,6 +109,24 @@ public class ReadErrorRateWalker extends ReadWalker { return errorsPerCycle; } + private byte convertIUPACBaseToSimpleBase(byte iupacBase) { + char compBase; + + switch (iupacBase) { + case 'A': + case 'a': compBase = 'A'; break; + case 'C': + case 'c': compBase = 'C'; break; + case 'G': + case 'g': compBase = 'G'; break; + case 'T': + case 't': compBase = 'T'; break; + default: compBase = '.'; break; + } + + return (byte) compBase; + } + /** * We don't initialize the array here because we need to know how long the read is first. *