From bce2f0d7cf2831a82c917ebc3456af943786b414 Mon Sep 17 00:00:00 2001 From: asivache Date: Mon, 28 Sep 2009 06:15:46 +0000 Subject: [PATCH] Now instantiates the list of alternative consenses to evaluate as LinkedHashSet to guarantee iterator traversal order. Old implementation used HashSet and exhibited unstable behavior when two alt consenses turned out to be equally good: depending on the run conditions (including size of the interval set being cleaned??), either one could be seen first as selected as the 'best' one git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1734 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/indels/IntervalCleanerWalker.java | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java index 69e130cfa..a9df8cd11 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java @@ -60,6 +60,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker private TreeSet readsToWrite = null; private TreeSet nextSetOfReadsToWrite = null; + private boolean debugOn = false; + public void initialize() { if ( LOD_THRESHOLD < 0.0 ) @@ -224,12 +226,19 @@ public class IntervalCleanerWalker extends LocusWindowWalker ArrayList altReads = new ArrayList(); // reads that don't perfectly match LinkedList altAlignmentsToTest = new LinkedList(); // should we try to make an alt consensus from the read? ArrayList leftMovedIndels = new ArrayList(); - HashSet altConsenses = new HashSet(); // list of alt consenses + Set altConsenses = new LinkedHashSet(); // list of alt consenses int totalMismatchSum = 0; + // decide which reads potentially need to be cleaned for ( SAMRecord read : reads ) { + // if ( debugOn ) { + // System.out.println(read.getReadName()+" "+read.getCigarString()+" "+read.getAlignmentStart()+"-"+read.getAlignmentEnd()); + // System.out.println(reference.substring((int)(read.getAlignmentStart()-leftmostIndex),(int)(read.getAlignmentEnd()-leftmostIndex))); + // System.out.println(read.getReadString()); + // } + // we currently can not deal with clipped reads correctly (or screwy record) if ( read.getCigar().numCigarElements() == 0 || readIsClipped(read) ) { refReads.add(read); @@ -247,6 +256,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker } int mismatchScore = mismatchQualitySumIgnoreCigar(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex); + // if ( debugOn ) System.out.println("mismatchScore="+mismatchScore); // if this doesn't match perfectly to the reference, let's try to clean it if ( mismatchScore > 0 ) { @@ -256,11 +266,14 @@ public class IntervalCleanerWalker extends LocusWindowWalker // if it has an indel, let's see if that's the best consensus if ( numBlocks == 2 ) altConsenses.add(createAlternateConsensus(aRead.getAlignmentStart() - (int)leftmostIndex, aRead.getCigar(), reference, aRead.getReadString())); - else + else { + // if ( debugOn ) System.out.println("Going to test..."); altAlignmentsToTest.add(aRead); + } } // otherwise, we can emit it as is else { + // if ( debugOn ) System.out.println("Emitting as is..."); refReads.add(read); } } @@ -271,8 +284,12 @@ public class IntervalCleanerWalker extends LocusWindowWalker // do a pairwise alignment against the reference SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadString(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND); Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getReadString()); - if ( c != null) + if ( c != null) { + // if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ; altConsenses.add(c); + } else { + // if ( debugOn ) System.out.println("FAILED to create Alt consensus from SW"); + } } } else { // choose alternate consenses randomly @@ -290,9 +307,14 @@ public class IntervalCleanerWalker extends LocusWindowWalker Consensus bestConsensus = null; Iterator iter = altConsenses.iterator(); + + // if ( debugOn ) System.out.println("------\nChecking consenses...\n--------\n"); + while ( iter.hasNext() ) { Consensus consensus = iter.next(); + // if ( debugOn ) System.out.println("Consensus: "+consensus.str); + for ( int j = 0; j < altReads.size(); j++ ) { AlignedRead toTest = altReads.get(j); Pair altAlignment = findBestOffset(consensus.str, toTest); @@ -458,6 +480,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker sb.append(reference.substring(refIdx)); String altConsensus = sb.toString(); // alternative consensus sequence we just built from the cuurent read + // if ( debugOn ) System.out.println("Alt consensus generated: "+altConsensus); + return new Consensus(altConsensus, c, indexOnRef); }