Oops. Slight twiddle of the math here so that I'm not asking if bestBase == nextBestBase.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@336 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d4ab95c098
commit
0e7d962eca
|
|
@ -8,16 +8,41 @@ import net.sf.samtools.SAMRecord;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing
|
* ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing the
|
||||||
|
* read to its home on the reference and noting the mismatch rate. It ignores reads with
|
||||||
|
* indels in them, treats high and low-quality references bases the same, and does not count
|
||||||
|
* ambiguous bases as mismatches. It's also thread-safe, so you can process a slew of reads
|
||||||
|
* in short order.
|
||||||
|
*
|
||||||
|
* @author Kiran Garimella
|
||||||
*/
|
*/
|
||||||
public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
||||||
@Argument(fullName="useNextBestBase",required=false,defaultValue="false")
|
@Argument(fullName="useNextBestBase",required=false,defaultValue="false")
|
||||||
public boolean useNextBestBase;
|
public boolean useNextBestBase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ignore reads with indels or clipping
|
||||||
|
*
|
||||||
|
* @param context the reference context
|
||||||
|
* @param read the read to assess
|
||||||
|
* @return true if the read can be processed, false if it should be ignored
|
||||||
|
*/
|
||||||
public boolean filter(LocusContext context, SAMRecord read) {
|
public boolean filter(LocusContext context, SAMRecord read) {
|
||||||
return (read.getCigar().numCigarElements() == 1);
|
return (read.getCigar().numCigarElements() == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For each read, return a boolean array indicating the locations of the mismatch.
|
||||||
|
* Length of the array is one element longer than the read length. The last element
|
||||||
|
* of this array is always "true" so that we can figure out how many reads we
|
||||||
|
* processed in a thread-safe manner.
|
||||||
|
*
|
||||||
|
* @param context the reference context
|
||||||
|
* @param read the read to assess
|
||||||
|
* @return An array of length (read_length + 1) indicating where the mismatches occur.
|
||||||
|
* Last element is for internal use so the reduce() function can figure out how
|
||||||
|
* many reads we processed.
|
||||||
|
*/
|
||||||
public boolean[] map(LocusContext context, SAMRecord read) {
|
public boolean[] map(LocusContext context, SAMRecord read) {
|
||||||
boolean[] errorsPerCycle = new boolean[read.getReadLength() + 1];
|
boolean[] errorsPerCycle = new boolean[read.getReadLength() + 1];
|
||||||
|
|
||||||
|
|
@ -55,21 +80,23 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nextBestBase != '.') {
|
if (nextBestBase != '.') {
|
||||||
errorsPerCycle[cycle] = !(bases[cycle] == compBase || bases[cycle] == nextBestBase);
|
errorsPerCycle[cycle] = !(bases[cycle] == compBase || nextBestBase == compBase);
|
||||||
totalMismatches = !(bases[cycle] == compBase || bases[cycle] == nextBestBase) ? 1 : 0;
|
totalMismatches = (!(bases[cycle] == compBase || nextBestBase == compBase)) ? 1 : 0;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
errorsPerCycle[cycle] = (bases[cycle] != compBase);
|
errorsPerCycle[cycle] = !(bases[cycle] == compBase);
|
||||||
totalMismatches += (bases[cycle] != compBase) ? 1 : 0;
|
totalMismatches += (!(bases[cycle] == compBase)) ? 1 : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
if (totalMismatches > 4) {
|
if (totalMismatches > 4) {
|
||||||
for (int cycle = 0; cycle < bases.length; cycle++) { System.out.print((char) bases[cycle]); } System.out.print("\n");
|
for (int cycle = 0; cycle < bases.length; cycle++) { System.out.print((char) bases[cycle]); } System.out.print("\n");
|
||||||
for (int cycle = 0, offset = (int) context.getPosition(); cycle < bases.length; cycle++, offset++) { System.out.print((char) contig[offset]); } System.out.print("\n");
|
for (int cycle = 0, offset = (int) context.getPosition(); cycle < bases.length; cycle++, offset++) { System.out.print((char) contig[offset]); } System.out.print("\n");
|
||||||
System.out.println(totalMismatches + "\n");
|
System.out.println(totalMismatches + "\n");
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
// We encode that we saw a read in the last position of the array.
|
// We encode that we saw a read in the last position of the array.
|
||||||
// That way we know what to normalize by, and we get thread safety!
|
// That way we know what to normalize by, and we get thread safety!
|
||||||
|
|
@ -78,10 +105,22 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
||||||
return errorsPerCycle;
|
return errorsPerCycle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We don't initialize the array here because we need to know how long the read is first.
|
||||||
|
*
|
||||||
|
* @return null
|
||||||
|
*/
|
||||||
public int[] reduceInit() {
|
public int[] reduceInit() {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Summarize the error rate data.
|
||||||
|
*
|
||||||
|
* @param value the read mismatch array
|
||||||
|
* @param sum the summed mismatch array
|
||||||
|
* @return the summed mismatch array with the new read mismatch array added
|
||||||
|
*/
|
||||||
public int[] reduce(boolean[] value, int[] sum) {
|
public int[] reduce(boolean[] value, int[] sum) {
|
||||||
if (sum == null) {
|
if (sum == null) {
|
||||||
sum = new int[value.length];
|
sum = new int[value.length];
|
||||||
|
|
@ -94,11 +133,15 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We've processed all the reads. Emit the final, normalized error rate data.
|
||||||
|
*
|
||||||
|
* @param sum the summed mismatch array
|
||||||
|
*/
|
||||||
public void onTraversalDone(int[] sum) {
|
public void onTraversalDone(int[] sum) {
|
||||||
for (int cycle = 0; cycle < sum.length - 1; cycle++) {
|
for (int cycle = 0; cycle < sum.length - 1; cycle++) {
|
||||||
double errorrate = ((double) sum[cycle])/((double) sum[sum.length - 1]);
|
double errorrate = ((double) sum[cycle])/((double) sum[sum.length - 1]);
|
||||||
System.out.println("[ERROR_RATE] " + cycle + " " + errorrate);
|
System.out.println("[ERROR_RATE] " + cycle + " " + errorrate);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue