Added walker to print out a histogram of where mismatches occur in alignments

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@89 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-03-18 19:46:42 +00:00
parent 1096bbd4d9
commit 45d2a9acd8
2 changed files with 92 additions and 4 deletions

View File

@ -19,8 +19,8 @@ public class AlignedReadsHistoWalker extends BasicReadWalker<Integer, Integer> {
}
public void initialize() {
for ( int i = 0; i < this.alignCounts.length; i++ ) {
this.alignCounts[i] = 0;
for ( int i = 0; i < alignCounts.length; i++ ) {
alignCounts[i] = 0;
}
}
@ -36,7 +36,8 @@ public class AlignedReadsHistoWalker extends BasicReadWalker<Integer, Integer> {
public Integer map(LocusContext context, SAMRecord read) {
//System.out.println(read.getAttribute("NM"));
int editDist = Integer.parseInt(read.getAttribute("NM").toString());
this.alignCounts[editDist]++;
if (editDist <= 50)
alignCounts[editDist]++;
return 1;
}
@ -48,7 +49,7 @@ public class AlignedReadsHistoWalker extends BasicReadWalker<Integer, Integer> {
public void onTraversalDone() {
int curTotal = 0;
for ( int i = 0; i < this.alignCounts.length; i++ ) {
for ( int i = 0; i < alignCounts.length; i++ ) {
curTotal += alignCounts[i];
System.out.printf("%3d %10d%n", i, curTotal);
}

View File

@ -0,0 +1,87 @@
package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.Utils;
import edu.mit.broad.picard.reference.ReferenceSequence;
import java.util.Iterator;
import java.util.List;
import static java.lang.reflect.Array.*;
public class MismatchHistoWalker extends BasicReadWalker<Integer, Integer> {
protected long[] mismatchCounts = new long[0];
protected final int MIN_TARGET_EDIT_DISTANCE = 5;
protected final int MAX_TARGET_EDIT_DISTANCE = 10;
public String getName() {
return "Mismatch_Histogram";
}
// Do we actually want to operate on the context?
public boolean filter(LocusContext context, SAMRecord read) {
// we only want aligned reads
return !read.getReadUnmappedFlag();
}
public Integer map(LocusContext context, SAMRecord read) {
int editDist = Integer.parseInt(read.getAttribute("NM").toString());
// ignore alignments with indels for now
if ( read.getAlignmentBlocks().size() == 1 &&
editDist >= MIN_TARGET_EDIT_DISTANCE &&
editDist <= MAX_TARGET_EDIT_DISTANCE ) {
ReferenceSequence refseq = context.getReferenceContig();
int start = read.getAlignmentStart()-1;
int stop = read.getAlignmentEnd();
List<Byte> refSeq = Utils.subseq(context.getReferenceContig().getBases(), start, stop);
List<Byte> readBases = Utils.subseq(read.getReadBases());
assert(refSeq.size() == readBases.size());
// it's actually faster to reallocate a resized array than to use ArrayLists...
if ( refSeq.size() > mismatchCounts.length ) {
int oldLength = mismatchCounts.length;
mismatchCounts = (long[])resizeArray(mismatchCounts, refSeq.size());
for ( int i = oldLength; i < refSeq.size(); i++ )
mismatchCounts[i] = 0;
}
String refStr = Utils.baseList2string(refSeq).toUpperCase();
String readStr = Utils.baseList2string(readBases).toUpperCase();
for ( int i = 0; i < refStr.length(); i++) {
if ( refStr.charAt(i) != readStr.charAt(i) )
mismatchCounts[i]++;
}
}
return 1;
}
public Integer reduceInit() { return 0; }
public Integer reduce(Integer value, Integer sum) {
return value + sum;
}
public void onTraversalDone() {
for ( int i = 0; i < mismatchCounts.length; i++ ) {
System.out.printf("%3d %10d%n", (i+1), mismatchCounts[i]);
}
}
private static Object resizeArray (Object oldArray, int newSize) {
int oldSize = getLength(oldArray);
Class elementType = oldArray.getClass().getComponentType();
Object newArray = newInstance(elementType,newSize);
int preserveLength = Math.min(oldSize,newSize);
if (preserveLength > 0)
System.arraycopy (oldArray,0,newArray,0,preserveLength);
return newArray;
}
}