Added walker to print out a histogram of where mismatches occur in alignments
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@89 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1096bbd4d9
commit
45d2a9acd8
|
|
@ -19,8 +19,8 @@ public class AlignedReadsHistoWalker extends BasicReadWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
for ( int i = 0; i < this.alignCounts.length; i++ ) {
|
for ( int i = 0; i < alignCounts.length; i++ ) {
|
||||||
this.alignCounts[i] = 0;
|
alignCounts[i] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -36,7 +36,8 @@ public class AlignedReadsHistoWalker extends BasicReadWalker<Integer, Integer> {
|
||||||
public Integer map(LocusContext context, SAMRecord read) {
|
public Integer map(LocusContext context, SAMRecord read) {
|
||||||
//System.out.println(read.getAttribute("NM"));
|
//System.out.println(read.getAttribute("NM"));
|
||||||
int editDist = Integer.parseInt(read.getAttribute("NM").toString());
|
int editDist = Integer.parseInt(read.getAttribute("NM").toString());
|
||||||
this.alignCounts[editDist]++;
|
if (editDist <= 50)
|
||||||
|
alignCounts[editDist]++;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -48,7 +49,7 @@ public class AlignedReadsHistoWalker extends BasicReadWalker<Integer, Integer> {
|
||||||
|
|
||||||
public void onTraversalDone() {
|
public void onTraversalDone() {
|
||||||
int curTotal = 0;
|
int curTotal = 0;
|
||||||
for ( int i = 0; i < this.alignCounts.length; i++ ) {
|
for ( int i = 0; i < alignCounts.length; i++ ) {
|
||||||
curTotal += alignCounts[i];
|
curTotal += alignCounts[i];
|
||||||
System.out.printf("%3d %10d%n", i, curTotal);
|
System.out.printf("%3d %10d%n", i, curTotal);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,87 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import static java.lang.reflect.Array.*;
|
||||||
|
|
||||||
|
public class MismatchHistoWalker extends BasicReadWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
protected long[] mismatchCounts = new long[0];
|
||||||
|
protected final int MIN_TARGET_EDIT_DISTANCE = 5;
|
||||||
|
protected final int MAX_TARGET_EDIT_DISTANCE = 10;
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "Mismatch_Histogram";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do we actually want to operate on the context?
|
||||||
|
public boolean filter(LocusContext context, SAMRecord read) {
|
||||||
|
// we only want aligned reads
|
||||||
|
return !read.getReadUnmappedFlag();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(LocusContext context, SAMRecord read) {
|
||||||
|
|
||||||
|
int editDist = Integer.parseInt(read.getAttribute("NM").toString());
|
||||||
|
|
||||||
|
// ignore alignments with indels for now
|
||||||
|
if ( read.getAlignmentBlocks().size() == 1 &&
|
||||||
|
editDist >= MIN_TARGET_EDIT_DISTANCE &&
|
||||||
|
editDist <= MAX_TARGET_EDIT_DISTANCE ) {
|
||||||
|
|
||||||
|
ReferenceSequence refseq = context.getReferenceContig();
|
||||||
|
|
||||||
|
int start = read.getAlignmentStart()-1;
|
||||||
|
int stop = read.getAlignmentEnd();
|
||||||
|
|
||||||
|
List<Byte> refSeq = Utils.subseq(context.getReferenceContig().getBases(), start, stop);
|
||||||
|
List<Byte> readBases = Utils.subseq(read.getReadBases());
|
||||||
|
|
||||||
|
assert(refSeq.size() == readBases.size());
|
||||||
|
|
||||||
|
// it's actually faster to reallocate a resized array than to use ArrayLists...
|
||||||
|
if ( refSeq.size() > mismatchCounts.length ) {
|
||||||
|
int oldLength = mismatchCounts.length;
|
||||||
|
mismatchCounts = (long[])resizeArray(mismatchCounts, refSeq.size());
|
||||||
|
for ( int i = oldLength; i < refSeq.size(); i++ )
|
||||||
|
mismatchCounts[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
String refStr = Utils.baseList2string(refSeq).toUpperCase();
|
||||||
|
String readStr = Utils.baseList2string(readBases).toUpperCase();
|
||||||
|
for ( int i = 0; i < refStr.length(); i++) {
|
||||||
|
if ( refStr.charAt(i) != readStr.charAt(i) )
|
||||||
|
mismatchCounts[i]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return value + sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone() {
|
||||||
|
for ( int i = 0; i < mismatchCounts.length; i++ ) {
|
||||||
|
System.out.printf("%3d %10d%n", (i+1), mismatchCounts[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Object resizeArray (Object oldArray, int newSize) {
|
||||||
|
int oldSize = getLength(oldArray);
|
||||||
|
Class elementType = oldArray.getClass().getComponentType();
|
||||||
|
Object newArray = newInstance(elementType,newSize);
|
||||||
|
int preserveLength = Math.min(oldSize,newSize);
|
||||||
|
if (preserveLength > 0)
|
||||||
|
System.arraycopy (oldArray,0,newArray,0,preserveLength);
|
||||||
|
return newArray;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue