Now instantiates the list of alternative consenses to evaluate as LinkedHashSet to guarantee iterator traversal order. Old implementation used HashSet and exhibited unstable behavior when two alt consenses turned out to be equally good: depending on the run conditions (including size of the interval set being cleaned??), either one could be seen first as selected as the 'best' one

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1734 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-09-28 06:15:46 +00:00
parent 663175e868
commit bce2f0d7cf
1 changed files with 27 additions and 3 deletions

View File

@ -60,6 +60,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
private TreeSet<ComparableSAMRecord> readsToWrite = null;
private TreeSet<ComparableSAMRecord> nextSetOfReadsToWrite = null;
private boolean debugOn = false;
public void initialize() {
if ( LOD_THRESHOLD < 0.0 )
@ -224,12 +226,19 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>(); // reads that don't perfectly match
LinkedList<AlignedRead> altAlignmentsToTest = new LinkedList<AlignedRead>(); // should we try to make an alt consensus from the read?
ArrayList<AlignedRead> leftMovedIndels = new ArrayList<AlignedRead>();
HashSet<Consensus> altConsenses = new HashSet<Consensus>(); // list of alt consenses
Set<Consensus> altConsenses = new LinkedHashSet<Consensus>(); // list of alt consenses
int totalMismatchSum = 0;
// decide which reads potentially need to be cleaned
for ( SAMRecord read : reads ) {
// if ( debugOn ) {
// System.out.println(read.getReadName()+" "+read.getCigarString()+" "+read.getAlignmentStart()+"-"+read.getAlignmentEnd());
// System.out.println(reference.substring((int)(read.getAlignmentStart()-leftmostIndex),(int)(read.getAlignmentEnd()-leftmostIndex)));
// System.out.println(read.getReadString());
// }
// we currently can not deal with clipped reads correctly (or screwy record)
if ( read.getCigar().numCigarElements() == 0 || readIsClipped(read) ) {
refReads.add(read);
@ -247,6 +256,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
}
int mismatchScore = mismatchQualitySumIgnoreCigar(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex);
// if ( debugOn ) System.out.println("mismatchScore="+mismatchScore);
// if this doesn't match perfectly to the reference, let's try to clean it
if ( mismatchScore > 0 ) {
@ -256,11 +266,14 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// if it has an indel, let's see if that's the best consensus
if ( numBlocks == 2 )
altConsenses.add(createAlternateConsensus(aRead.getAlignmentStart() - (int)leftmostIndex, aRead.getCigar(), reference, aRead.getReadString()));
else
else {
// if ( debugOn ) System.out.println("Going to test...");
altAlignmentsToTest.add(aRead);
}
}
// otherwise, we can emit it as is
else {
// if ( debugOn ) System.out.println("Emitting as is...");
refReads.add(read);
}
}
@ -271,8 +284,12 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// do a pairwise alignment against the reference
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadString(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getReadString());
if ( c != null)
if ( c != null) {
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
altConsenses.add(c);
} else {
// if ( debugOn ) System.out.println("FAILED to create Alt consensus from SW");
}
}
} else {
// choose alternate consenses randomly
@ -290,9 +307,14 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
Consensus bestConsensus = null;
Iterator<Consensus> iter = altConsenses.iterator();
// if ( debugOn ) System.out.println("------\nChecking consenses...\n--------\n");
while ( iter.hasNext() ) {
Consensus consensus = iter.next();
// if ( debugOn ) System.out.println("Consensus: "+consensus.str);
for ( int j = 0; j < altReads.size(); j++ ) {
AlignedRead toTest = altReads.get(j);
Pair<Integer, Integer> altAlignment = findBestOffset(consensus.str, toTest);
@ -458,6 +480,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
sb.append(reference.substring(refIdx));
String altConsensus = sb.toString(); // alternative consensus sequence we just built from the cuurent read
// if ( debugOn ) System.out.println("Alt consensus generated: "+altConsensus);
return new Consensus(altConsensus, c, indexOnRef);
}