From f9be175f444e84da0150b3b6af5b7edee77c32b1 Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 10 Jun 2009 17:43:22 +0000 Subject: [PATCH] Be smart about trying alternate consenses: try prior indels first and only 1 instance of them git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@971 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/indels/IntervalCleanerWalker.java | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java index 959a51bc9..da2e8e84c 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java @@ -196,7 +196,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker return sum; } - private boolean readIsClipped(SAMRecord read) { + private static boolean readIsClipped(SAMRecord read) { final Cigar c = read.getCigar(); final int n = c.numCigarElements(); if ( c.getCigarElement(n-1).getOperator() == CigarOperator.S || @@ -204,12 +204,26 @@ public class IntervalCleanerWalker extends LocusWindowWalker return false; } + private static String hashIndel(AlignedRead read) { + final Cigar c = read.getCigar(); + final int start = read.getAlignmentStart() + c.getCigarElement(0).getLength() - 1; + StringBuffer sb = new StringBuffer(); + sb.append(start); + if ( c.getCigarElement(1).getOperator() == CigarOperator.D ) + sb.append("D"); + else + sb.append("I"); + sb.append(c.getCigarElement(1).getLength()); + return sb.toString(); + } + private void clean(List reads, String reference, GenomeLoc interval) { long leftmostIndex = interval.getStart(); - ArrayList refReads = new ArrayList(); - ArrayList altReads = new ArrayList(); - ArrayList altAlignmentsToTest = new ArrayList(); + ArrayList refReads = new ArrayList(); // reads that perfectly match ref + LinkedList altReads = new LinkedList(); // reads that don't perfectly match + LinkedList altAlignmentsToTest = new LinkedList(); // should we try to make an alt consensus from the corresponding read in altReads? + HashSet priorIndelsToTest = new HashSet(); // list of indels seen in the prior alignments to test (so we don't duplicate) int totalMismatchSum = 0; // decide which reads potentially need to be cleaned @@ -236,11 +250,11 @@ public class IntervalCleanerWalker extends LocusWindowWalker totalMismatchSum += mismatchScore; aRead.setMismatchScoreToReference(mismatchScore); } - // otherwise, if it has an indel, let's see if that's the best consensus - else if ( numBlocks == 2 ) { + // otherwise, if it has an indel, let's see if that's the best consensus (one instance per indel though) + else if ( numBlocks == 2 && priorIndelsToTest.add(hashIndel(aRead))) { aRead.doNotRealign(); - altReads.add(aRead); - altAlignmentsToTest.add(true); + altReads.addFirst(aRead); + altAlignmentsToTest.addFirst(true); } // otherwise, we can emit it as is else {