Added ability to evaluate best + random base.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@564 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-04-29 20:05:36 +00:00
parent 334f158e5a
commit 0a707a887b
1 changed files with 36 additions and 40 deletions

View File

@ -4,24 +4,25 @@ import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import edu.mit.broad.picard.reference.ReferenceSequence; import edu.mit.broad.picard.reference.ReferenceSequence;
import java.util.Random;
/** /**
* ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing the * ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing the
* read to its home on the reference and noting the mismatch rate. It ignores reads with * read to its home on the reference and noting the mismatch rate. It ignores reads with
* indels in them, treats high and low-quality references bases the same, and does not count * indels in them, treats high and low-quality reference bases the same, and does not count
* ambiguous bases as mismatches. It's also thread-safe, so you can process a slew of reads * ambiguous bases as mismatches. It's also thread-safe, so you can process a slew of reads
* in short order. * in short order.
* *
* @author Kiran Garimella * @author Kiran Garimella
*/ */
public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> { public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
@Argument(fullName="useNextBestBase",required=false,defaultValue="false") @Argument(fullName="printVisualHits", shortName="v", required=false, defaultValue="false") public boolean printVisualHits;
public boolean useNextBestBase; @Argument(fullName="useNextBestBase", shortName="nb", required=false, defaultValue="false") public boolean useNextBestBase;
@Argument(fullName="useNextRandomBase", shortName="nr", required=false, defaultValue="false") public boolean useNextRandomBase;
@Argument(fullName="printVisualHits",required=false,defaultValue="false")
public boolean printVisualHits;
/** /**
* Ignore reads with indels or clipping * Ignore reads with indels or clipping
@ -61,19 +62,7 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
System.out.println(); System.out.println();
for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) { for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) {
byte compBase; byte compBase = convertIUPACBaseToSimpleBase(contig[offset]);
switch (contig[offset]) {
case 'A':
case 'a': compBase = 'A'; break;
case 'C':
case 'c': compBase = 'C'; break;
case 'G':
case 'g': compBase = 'G'; break;
case 'T':
case 't': compBase = 'T'; break;
default: compBase = '.'; break;
}
System.out.print((char) compBase); System.out.print((char) compBase);
} }
@ -81,30 +70,19 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
} }
for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) { for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) {
byte compBase; byte compBase = convertIUPACBaseToSimpleBase(contig[offset]);
switch (contig[offset]) {
case 'A':
case 'a': compBase = 'A'; break;
case 'C':
case 'c': compBase = 'C'; break;
case 'G':
case 'g': compBase = 'G'; break;
case 'T':
case 't': compBase = 'T'; break;
default: compBase = '.'; break;
}
if (compBase != '.') { if (compBase != '.') {
if (useNextBestBase) { if (useNextBestBase || useNextRandomBase) {
int nextBestBaseIndex = QualityUtils.compressedQualityToBaseIndex(sq[cycle]);
byte nextBestBase; byte nextBestBase;
switch (nextBestBaseIndex) { if (useNextBestBase) {
case 0: nextBestBase = 'A'; break; nextBestBase = (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(sq[cycle]));
case 1: nextBestBase = 'C'; break; } else {
case 2: nextBestBase = 'G'; break; nextBestBase = bases[cycle];
case 3: nextBestBase = 'T'; break; Random generator = new Random();
default: nextBestBase = '.'; break; while (nextBestBase == bases[cycle]) {
nextBestBase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4));
}
} }
if (nextBestBase != '.') { if (nextBestBase != '.') {
@ -131,6 +109,24 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
return errorsPerCycle; return errorsPerCycle;
} }
private byte convertIUPACBaseToSimpleBase(byte iupacBase) {
char compBase;
switch (iupacBase) {
case 'A':
case 'a': compBase = 'A'; break;
case 'C':
case 'c': compBase = 'C'; break;
case 'G':
case 'g': compBase = 'G'; break;
case 'T':
case 't': compBase = 'T'; break;
default: compBase = '.'; break;
}
return (byte) compBase;
}
/** /**
* We don't initialize the array here because we need to know how long the read is first. * We don't initialize the array here because we need to know how long the read is first.
* *