Added walker to print out a histogram of where mismatches occur in alignments
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@89 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1096bbd4d9
commit
45d2a9acd8
|
|
@ -19,8 +19,8 @@ public class AlignedReadsHistoWalker extends BasicReadWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public void initialize() {
|
||||
for ( int i = 0; i < this.alignCounts.length; i++ ) {
|
||||
this.alignCounts[i] = 0;
|
||||
for ( int i = 0; i < alignCounts.length; i++ ) {
|
||||
alignCounts[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +36,8 @@ public class AlignedReadsHistoWalker extends BasicReadWalker<Integer, Integer> {
|
|||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
//System.out.println(read.getAttribute("NM"));
|
||||
int editDist = Integer.parseInt(read.getAttribute("NM").toString());
|
||||
this.alignCounts[editDist]++;
|
||||
if (editDist <= 50)
|
||||
alignCounts[editDist]++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ public class AlignedReadsHistoWalker extends BasicReadWalker<Integer, Integer> {
|
|||
|
||||
public void onTraversalDone() {
|
||||
int curTotal = 0;
|
||||
for ( int i = 0; i < this.alignCounts.length; i++ ) {
|
||||
for ( int i = 0; i < alignCounts.length; i++ ) {
|
||||
curTotal += alignCounts[i];
|
||||
System.out.printf("%3d %10d%n", i, curTotal);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,87 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import static java.lang.reflect.Array.*;
|
||||
|
||||
public class MismatchHistoWalker extends BasicReadWalker<Integer, Integer> {
|
||||
|
||||
protected long[] mismatchCounts = new long[0];
|
||||
protected final int MIN_TARGET_EDIT_DISTANCE = 5;
|
||||
protected final int MAX_TARGET_EDIT_DISTANCE = 10;
|
||||
|
||||
public String getName() {
|
||||
return "Mismatch_Histogram";
|
||||
}
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(LocusContext context, SAMRecord read) {
|
||||
// we only want aligned reads
|
||||
return !read.getReadUnmappedFlag();
|
||||
}
|
||||
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
|
||||
int editDist = Integer.parseInt(read.getAttribute("NM").toString());
|
||||
|
||||
// ignore alignments with indels for now
|
||||
if ( read.getAlignmentBlocks().size() == 1 &&
|
||||
editDist >= MIN_TARGET_EDIT_DISTANCE &&
|
||||
editDist <= MAX_TARGET_EDIT_DISTANCE ) {
|
||||
|
||||
ReferenceSequence refseq = context.getReferenceContig();
|
||||
|
||||
int start = read.getAlignmentStart()-1;
|
||||
int stop = read.getAlignmentEnd();
|
||||
|
||||
List<Byte> refSeq = Utils.subseq(context.getReferenceContig().getBases(), start, stop);
|
||||
List<Byte> readBases = Utils.subseq(read.getReadBases());
|
||||
|
||||
assert(refSeq.size() == readBases.size());
|
||||
|
||||
// it's actually faster to reallocate a resized array than to use ArrayLists...
|
||||
if ( refSeq.size() > mismatchCounts.length ) {
|
||||
int oldLength = mismatchCounts.length;
|
||||
mismatchCounts = (long[])resizeArray(mismatchCounts, refSeq.size());
|
||||
for ( int i = oldLength; i < refSeq.size(); i++ )
|
||||
mismatchCounts[i] = 0;
|
||||
}
|
||||
|
||||
String refStr = Utils.baseList2string(refSeq).toUpperCase();
|
||||
String readStr = Utils.baseList2string(readBases).toUpperCase();
|
||||
for ( int i = 0; i < refStr.length(); i++) {
|
||||
if ( refStr.charAt(i) != readStr.charAt(i) )
|
||||
mismatchCounts[i]++;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return value + sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone() {
|
||||
for ( int i = 0; i < mismatchCounts.length; i++ ) {
|
||||
System.out.printf("%3d %10d%n", (i+1), mismatchCounts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private static Object resizeArray (Object oldArray, int newSize) {
|
||||
int oldSize = getLength(oldArray);
|
||||
Class elementType = oldArray.getClass().getComponentType();
|
||||
Object newArray = newInstance(elementType,newSize);
|
||||
int preserveLength = Math.min(oldSize,newSize);
|
||||
if (preserveLength > 0)
|
||||
System.arraycopy (oldArray,0,newArray,0,preserveLength);
|
||||
return newArray;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue