From 45d2a9acd8a562dd5afcf9d97d70b53cebceb286 Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 18 Mar 2009 19:46:42 +0000 Subject: [PATCH] Added walker to print out a histogram of where mismatches occur in alignments git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@89 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/AlignedReadsHistoWalker.java | 9 +- .../gatk/walkers/MismatchHistoWalker.java | 87 +++++++++++++++++++ 2 files changed, 92 insertions(+), 4 deletions(-) create mode 100755 playground/java/src/org/broadinstitute/sting/gatk/walkers/MismatchHistoWalker.java diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlignedReadsHistoWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlignedReadsHistoWalker.java index 982cf7f12..01be7686c 100755 --- a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlignedReadsHistoWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlignedReadsHistoWalker.java @@ -19,8 +19,8 @@ public class AlignedReadsHistoWalker extends BasicReadWalker { } public void initialize() { - for ( int i = 0; i < this.alignCounts.length; i++ ) { - this.alignCounts[i] = 0; + for ( int i = 0; i < alignCounts.length; i++ ) { + alignCounts[i] = 0; } } @@ -36,7 +36,8 @@ public class AlignedReadsHistoWalker extends BasicReadWalker { public Integer map(LocusContext context, SAMRecord read) { //System.out.println(read.getAttribute("NM")); int editDist = Integer.parseInt(read.getAttribute("NM").toString()); - this.alignCounts[editDist]++; + if (editDist <= 50) + alignCounts[editDist]++; return 1; } @@ -48,7 +49,7 @@ public class AlignedReadsHistoWalker extends BasicReadWalker { public void onTraversalDone() { int curTotal = 0; - for ( int i = 0; i < this.alignCounts.length; i++ ) { + for ( int i = 0; i < alignCounts.length; i++ ) { curTotal += alignCounts[i]; System.out.printf("%3d %10d%n", i, curTotal); } diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/MismatchHistoWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/MismatchHistoWalker.java new file mode 100755 index 000000000..a2fb8e357 --- /dev/null +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/MismatchHistoWalker.java @@ -0,0 +1,87 @@ +package org.broadinstitute.sting.gatk.walkers; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.utils.Utils; +import edu.mit.broad.picard.reference.ReferenceSequence; + +import java.util.Iterator; +import java.util.List; +import static java.lang.reflect.Array.*; + +public class MismatchHistoWalker extends BasicReadWalker { + + protected long[] mismatchCounts = new long[0]; + protected final int MIN_TARGET_EDIT_DISTANCE = 5; + protected final int MAX_TARGET_EDIT_DISTANCE = 10; + + public String getName() { + return "Mismatch_Histogram"; + } + + // Do we actually want to operate on the context? + public boolean filter(LocusContext context, SAMRecord read) { + // we only want aligned reads + return !read.getReadUnmappedFlag(); + } + + public Integer map(LocusContext context, SAMRecord read) { + + int editDist = Integer.parseInt(read.getAttribute("NM").toString()); + + // ignore alignments with indels for now + if ( read.getAlignmentBlocks().size() == 1 && + editDist >= MIN_TARGET_EDIT_DISTANCE && + editDist <= MAX_TARGET_EDIT_DISTANCE ) { + + ReferenceSequence refseq = context.getReferenceContig(); + + int start = read.getAlignmentStart()-1; + int stop = read.getAlignmentEnd(); + + List refSeq = Utils.subseq(context.getReferenceContig().getBases(), start, stop); + List readBases = Utils.subseq(read.getReadBases()); + + assert(refSeq.size() == readBases.size()); + + // it's actually faster to reallocate a resized array than to use ArrayLists... + if ( refSeq.size() > mismatchCounts.length ) { + int oldLength = mismatchCounts.length; + mismatchCounts = (long[])resizeArray(mismatchCounts, refSeq.size()); + for ( int i = oldLength; i < refSeq.size(); i++ ) + mismatchCounts[i] = 0; + } + + String refStr = Utils.baseList2string(refSeq).toUpperCase(); + String readStr = Utils.baseList2string(readBases).toUpperCase(); + for ( int i = 0; i < refStr.length(); i++) { + if ( refStr.charAt(i) != readStr.charAt(i) ) + mismatchCounts[i]++; + } + } + + return 1; + } + + public Integer reduceInit() { return 0; } + + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + public void onTraversalDone() { + for ( int i = 0; i < mismatchCounts.length; i++ ) { + System.out.printf("%3d %10d%n", (i+1), mismatchCounts[i]); + } + } + + private static Object resizeArray (Object oldArray, int newSize) { + int oldSize = getLength(oldArray); + Class elementType = oldArray.getClass().getComponentType(); + Object newArray = newInstance(elementType,newSize); + int preserveLength = Math.min(oldSize,newSize); + if (preserveLength > 0) + System.arraycopy (oldArray,0,newArray,0,preserveLength); + return newArray; + } +} \ No newline at end of file