Oops. Slight twiddle of the math here so that I'm not asking if bestBase == nextBestBase.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@336 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d4ab95c098
commit
0e7d962eca
|
|
@ -8,16 +8,41 @@ import net.sf.samtools.SAMRecord;
|
|||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||
|
||||
/**
|
||||
* ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing
|
||||
* ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing the
|
||||
* read to its home on the reference and noting the mismatch rate. It ignores reads with
|
||||
* indels in them, treats high and low-quality references bases the same, and does not count
|
||||
* ambiguous bases as mismatches. It's also thread-safe, so you can process a slew of reads
|
||||
* in short order.
|
||||
*
|
||||
* @author Kiran Garimella
|
||||
*/
|
||||
public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
||||
@Argument(fullName="useNextBestBase",required=false,defaultValue="false")
|
||||
public boolean useNextBestBase;
|
||||
|
||||
/**
|
||||
* Ignore reads with indels or clipping
|
||||
*
|
||||
* @param context the reference context
|
||||
* @param read the read to assess
|
||||
* @return true if the read can be processed, false if it should be ignored
|
||||
*/
|
||||
public boolean filter(LocusContext context, SAMRecord read) {
|
||||
return (read.getCigar().numCigarElements() == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* For each read, return a boolean array indicating the locations of the mismatch.
|
||||
* Length of the array is one element longer than the read length. The last element
|
||||
* of this array is always "true" so that we can figure out how many reads we
|
||||
* processed in a thread-safe manner.
|
||||
*
|
||||
* @param context the reference context
|
||||
* @param read the read to assess
|
||||
* @return An array of length (read_length + 1) indicating where the mismatches occur.
|
||||
* Last element is for internal use so the reduce() function can figure out how
|
||||
* many reads we processed.
|
||||
*/
|
||||
public boolean[] map(LocusContext context, SAMRecord read) {
|
||||
boolean[] errorsPerCycle = new boolean[read.getReadLength() + 1];
|
||||
|
||||
|
|
@ -55,21 +80,23 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
|||
}
|
||||
|
||||
if (nextBestBase != '.') {
|
||||
errorsPerCycle[cycle] = !(bases[cycle] == compBase || bases[cycle] == nextBestBase);
|
||||
totalMismatches = !(bases[cycle] == compBase || bases[cycle] == nextBestBase) ? 1 : 0;
|
||||
errorsPerCycle[cycle] = !(bases[cycle] == compBase || nextBestBase == compBase);
|
||||
totalMismatches = (!(bases[cycle] == compBase || nextBestBase == compBase)) ? 1 : 0;
|
||||
}
|
||||
} else {
|
||||
errorsPerCycle[cycle] = (bases[cycle] != compBase);
|
||||
totalMismatches += (bases[cycle] != compBase) ? 1 : 0;
|
||||
errorsPerCycle[cycle] = !(bases[cycle] == compBase);
|
||||
totalMismatches += (!(bases[cycle] == compBase)) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
if (totalMismatches > 4) {
|
||||
for (int cycle = 0; cycle < bases.length; cycle++) { System.out.print((char) bases[cycle]); } System.out.print("\n");
|
||||
for (int cycle = 0, offset = (int) context.getPosition(); cycle < bases.length; cycle++, offset++) { System.out.print((char) contig[offset]); } System.out.print("\n");
|
||||
System.out.println(totalMismatches + "\n");
|
||||
}
|
||||
*/
|
||||
|
||||
// We encode that we saw a read in the last position of the array.
|
||||
// That way we know what to normalize by, and we get thread safety!
|
||||
|
|
@ -78,10 +105,22 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
|||
return errorsPerCycle;
|
||||
}
|
||||
|
||||
/**
|
||||
* We don't initialize the array here because we need to know how long the read is first.
|
||||
*
|
||||
* @return null
|
||||
*/
|
||||
public int[] reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Summarize the error rate data.
|
||||
*
|
||||
* @param value the read mismatch array
|
||||
* @param sum the summed mismatch array
|
||||
* @return the summed mismatch array with the new read mismatch array added
|
||||
*/
|
||||
public int[] reduce(boolean[] value, int[] sum) {
|
||||
if (sum == null) {
|
||||
sum = new int[value.length];
|
||||
|
|
@ -94,11 +133,15 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
|||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* We've processed all the reads. Emit the final, normalized error rate data.
|
||||
*
|
||||
* @param sum the summed mismatch array
|
||||
*/
|
||||
public void onTraversalDone(int[] sum) {
|
||||
for (int cycle = 0; cycle < sum.length - 1; cycle++) {
|
||||
double errorrate = ((double) sum[cycle])/((double) sum[sum.length - 1]);
|
||||
System.out.println("[ERROR_RATE] " + cycle + " " + errorrate);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue