Now instantiates the list of alternative consenses to evaluate as LinkedHashSet to guarantee iterator traversal order. Old implementation used HashSet and exhibited unstable behavior when two alt consenses turned out to be equally good: depending on the run conditions (including size of the interval set being cleaned??), either one could be seen first as selected as the 'best' one
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1734 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
663175e868
commit
bce2f0d7cf
|
|
@ -60,6 +60,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
private TreeSet<ComparableSAMRecord> readsToWrite = null;
|
||||
private TreeSet<ComparableSAMRecord> nextSetOfReadsToWrite = null;
|
||||
|
||||
private boolean debugOn = false;
|
||||
|
||||
public void initialize() {
|
||||
|
||||
if ( LOD_THRESHOLD < 0.0 )
|
||||
|
|
@ -224,12 +226,19 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>(); // reads that don't perfectly match
|
||||
LinkedList<AlignedRead> altAlignmentsToTest = new LinkedList<AlignedRead>(); // should we try to make an alt consensus from the read?
|
||||
ArrayList<AlignedRead> leftMovedIndels = new ArrayList<AlignedRead>();
|
||||
HashSet<Consensus> altConsenses = new HashSet<Consensus>(); // list of alt consenses
|
||||
Set<Consensus> altConsenses = new LinkedHashSet<Consensus>(); // list of alt consenses
|
||||
int totalMismatchSum = 0;
|
||||
|
||||
|
||||
// decide which reads potentially need to be cleaned
|
||||
for ( SAMRecord read : reads ) {
|
||||
|
||||
// if ( debugOn ) {
|
||||
// System.out.println(read.getReadName()+" "+read.getCigarString()+" "+read.getAlignmentStart()+"-"+read.getAlignmentEnd());
|
||||
// System.out.println(reference.substring((int)(read.getAlignmentStart()-leftmostIndex),(int)(read.getAlignmentEnd()-leftmostIndex)));
|
||||
// System.out.println(read.getReadString());
|
||||
// }
|
||||
|
||||
// we currently can not deal with clipped reads correctly (or screwy record)
|
||||
if ( read.getCigar().numCigarElements() == 0 || readIsClipped(read) ) {
|
||||
refReads.add(read);
|
||||
|
|
@ -247,6 +256,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
}
|
||||
|
||||
int mismatchScore = mismatchQualitySumIgnoreCigar(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex);
|
||||
// if ( debugOn ) System.out.println("mismatchScore="+mismatchScore);
|
||||
|
||||
// if this doesn't match perfectly to the reference, let's try to clean it
|
||||
if ( mismatchScore > 0 ) {
|
||||
|
|
@ -256,11 +266,14 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
// if it has an indel, let's see if that's the best consensus
|
||||
if ( numBlocks == 2 )
|
||||
altConsenses.add(createAlternateConsensus(aRead.getAlignmentStart() - (int)leftmostIndex, aRead.getCigar(), reference, aRead.getReadString()));
|
||||
else
|
||||
else {
|
||||
// if ( debugOn ) System.out.println("Going to test...");
|
||||
altAlignmentsToTest.add(aRead);
|
||||
}
|
||||
}
|
||||
// otherwise, we can emit it as is
|
||||
else {
|
||||
// if ( debugOn ) System.out.println("Emitting as is...");
|
||||
refReads.add(read);
|
||||
}
|
||||
}
|
||||
|
|
@ -271,8 +284,12 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
// do a pairwise alignment against the reference
|
||||
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadString(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
||||
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getReadString());
|
||||
if ( c != null)
|
||||
if ( c != null) {
|
||||
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
|
||||
altConsenses.add(c);
|
||||
} else {
|
||||
// if ( debugOn ) System.out.println("FAILED to create Alt consensus from SW");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// choose alternate consenses randomly
|
||||
|
|
@ -290,9 +307,14 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
|
||||
Consensus bestConsensus = null;
|
||||
Iterator<Consensus> iter = altConsenses.iterator();
|
||||
|
||||
// if ( debugOn ) System.out.println("------\nChecking consenses...\n--------\n");
|
||||
|
||||
while ( iter.hasNext() ) {
|
||||
Consensus consensus = iter.next();
|
||||
|
||||
// if ( debugOn ) System.out.println("Consensus: "+consensus.str);
|
||||
|
||||
for ( int j = 0; j < altReads.size(); j++ ) {
|
||||
AlignedRead toTest = altReads.get(j);
|
||||
Pair<Integer, Integer> altAlignment = findBestOffset(consensus.str, toTest);
|
||||
|
|
@ -458,6 +480,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
sb.append(reference.substring(refIdx));
|
||||
String altConsensus = sb.toString(); // alternative consensus sequence we just built from the cuurent read
|
||||
|
||||
// if ( debugOn ) System.out.println("Alt consensus generated: "+altConsensus);
|
||||
|
||||
return new Consensus(altConsensus, c, indexOnRef);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue