Added ability to evaluate best + random base.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@564 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-04-29 20:05:36 +00:00
parent 334f158e5a
commit 0a707a887b
1 changed files with 36 additions and 40 deletions

View File

@ -4,24 +4,25 @@ import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import net.sf.samtools.SAMRecord;
import edu.mit.broad.picard.reference.ReferenceSequence;
import java.util.Random;
/**
* ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing the
* read to its home on the reference and noting the mismatch rate. It ignores reads with
* indels in them, treats high and low-quality references bases the same, and does not count
* indels in them, treats high and low-quality reference bases the same, and does not count
* ambiguous bases as mismatches. It's also thread-safe, so you can process a slew of reads
* in short order.
*
* @author Kiran Garimella
*/
public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
@Argument(fullName="useNextBestBase",required=false,defaultValue="false")
public boolean useNextBestBase;
@Argument(fullName="printVisualHits",required=false,defaultValue="false")
public boolean printVisualHits;
@Argument(fullName="printVisualHits", shortName="v", required=false, defaultValue="false") public boolean printVisualHits;
@Argument(fullName="useNextBestBase", shortName="nb", required=false, defaultValue="false") public boolean useNextBestBase;
@Argument(fullName="useNextRandomBase", shortName="nr", required=false, defaultValue="false") public boolean useNextRandomBase;
/**
* Ignore reads with indels or clipping
@ -61,19 +62,7 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
System.out.println();
for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) {
byte compBase;
switch (contig[offset]) {
case 'A':
case 'a': compBase = 'A'; break;
case 'C':
case 'c': compBase = 'C'; break;
case 'G':
case 'g': compBase = 'G'; break;
case 'T':
case 't': compBase = 'T'; break;
default: compBase = '.'; break;
}
byte compBase = convertIUPACBaseToSimpleBase(contig[offset]);
System.out.print((char) compBase);
}
@ -81,30 +70,19 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
}
for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) {
byte compBase;
switch (contig[offset]) {
case 'A':
case 'a': compBase = 'A'; break;
case 'C':
case 'c': compBase = 'C'; break;
case 'G':
case 'g': compBase = 'G'; break;
case 'T':
case 't': compBase = 'T'; break;
default: compBase = '.'; break;
}
byte compBase = convertIUPACBaseToSimpleBase(contig[offset]);
if (compBase != '.') {
if (useNextBestBase) {
int nextBestBaseIndex = QualityUtils.compressedQualityToBaseIndex(sq[cycle]);
if (useNextBestBase || useNextRandomBase) {
byte nextBestBase;
switch (nextBestBaseIndex) {
case 0: nextBestBase = 'A'; break;
case 1: nextBestBase = 'C'; break;
case 2: nextBestBase = 'G'; break;
case 3: nextBestBase = 'T'; break;
default: nextBestBase = '.'; break;
if (useNextBestBase) {
nextBestBase = (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(sq[cycle]));
} else {
nextBestBase = bases[cycle];
Random generator = new Random();
while (nextBestBase == bases[cycle]) {
nextBestBase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4));
}
}
if (nextBestBase != '.') {
@ -131,6 +109,24 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
return errorsPerCycle;
}
private byte convertIUPACBaseToSimpleBase(byte iupacBase) {
char compBase;
switch (iupacBase) {
case 'A':
case 'a': compBase = 'A'; break;
case 'C':
case 'c': compBase = 'C'; break;
case 'G':
case 'g': compBase = 'G'; break;
case 'T':
case 't': compBase = 'T'; break;
default: compBase = '.'; break;
}
return (byte) compBase;
}
/**
* We don't initialize the array here because we need to know how long the read is first.
*